In [1]:
import polars as pl
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from kshape_core_gpu import KShapeClusteringGPU
import sys

MAC_DIR = '/Users/igwanhyeong/PycharmProjects/data_research/data/'
WINDOW_DIR = 'C:/Users/USER/PycharmProjects/research/data/'

if sys.platform == 'win32':
    DIR = WINDOW_DIR
    print(torch.cuda.is_available())
    print(torch.cuda.device_count())
    print(torch.version.cuda)
    print(torch.__version__)
    print(torch.cuda.get_device_name(0))
    print(torch.__version__)
else:
    DIR = MAC_DIR

tb_bas_oper_part_mst = (pl.read_parquet(DIR + 'tb_bas_oper_part_mst.parquet')
                        .select(['OPER_PART_NO', 'OPER_PART_NM'])
                        .rename({'OPER_PART_NO': 'oper_part_no', 'OPER_PART_NM': 'oper_part_nm'}))
tb_dyn_fcst_demand_sellout = (pl.read_parquet(DIR + 'tb_dyn_fcst_dmnd_sellout.parquet')
                              .select(['PART_NO', 'DMND_QTY', 'DMND_DT', 'OPER_PART_NO'])
                              .rename({'PART_NO': 'part_no', 'OPER_PART_NO': 'oper_part_no', 'DMND_DT': 'demand_dt', 'DMND_QTY': 'demand_qty'})
                              .select(['part_no', 'oper_part_no', 'demand_dt', 'demand_qty']))

In [2]:
target_df = (tb_dyn_fcst_demand_sellout
    .with_columns(
        (pl.col("demand_dt").cast(pl.Int64) // 100).alias("demand_yyyymm")
    )
    .join(tb_bas_oper_part_mst, on = 'oper_part_no', how = 'left')
    .group_by(['oper_part_no', 'demand_yyyymm'])
    .agg(pl.col('demand_qty').sum().alias('demand_qty'))
    .sort(['oper_part_no', 'demand_yyyymm'])
    .with_columns(pl.col('demand_yyyymm').cast(pl.Utf8).str.strptime(pl.Date, '%Y%m').alias('month'))
)


min_month = target_df.select(pl.col('month').min())[0, 0]
max_month = target_df.select(pl.col('month').max())[0, 0]

full_months = pl.date_range(start = min_month, end = max_month, interval = '1mo', eager = True)
month_df = pl.DataFrame({'month': full_months})
unique_parts = target_df.select(pl.col('oper_part_no').unique())
base = unique_parts.join(month_df, how = 'cross')

aligned_df = (base
                .join(
                    target_df.select(['oper_part_no', 'month', 'demand_qty']),
                    on = ['oper_part_no', 'month'], how = 'left')
                .with_columns(pl.col('demand_qty').fill_null(0.0))
                .pivot(
                    values = 'demand_qty',
                    on = 'month',
                    aggregate_function = 'first'
                )
              )

In [3]:
aligned_df

oper_part_no,2017-12-01,2018-01-01,2018-02-01,2018-03-01,2018-04-01,2018-05-01,2018-06-01,2018-07-01,2018-08-01,2018-09-01,2018-10-01,2018-11-01,2018-12-01,2019-01-01,2019-02-01,2019-03-01,2019-04-01,2019-05-01,2019-06-01,2019-07-01,2019-08-01,2019-09-01,2019-10-01,2019-11-01,2019-12-01,2020-01-01,2020-02-01,2020-03-01,2020-04-01,2020-05-01,2020-06-01,2020-07-01,2020-08-01,2020-09-01,2020-10-01,2020-11-01,…,2024-02-01,2024-03-01,2024-04-01,2024-05-01,2024-06-01,2024-07-01,2024-08-01,2024-09-01,2024-10-01,2024-11-01,2024-12-01,2025-01-01,2025-02-01,2025-03-01,2025-04-01,2025-05-01,2025-06-01,2025-07-01,2025-08-01,2025-09-01,2025-10-01,2025-11-01,2025-12-01,2026-01-01,2026-02-01,2026-03-01,2026-04-01,2026-05-01,2026-06-01,2026-07-01,2026-08-01,2026-09-01,2026-10-01,2026-11-01,2026-12-01,2027-01-01,2027-02-01
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""T4520-43021""",0.0,50.0,17.0,12.0,15.0,10.0,35.0,0.0,5.0,55.0,80.0,2.0,21.0,15.0,110.0,6.0,45.0,0.0,36.0,14.0,25.0,5.0,5.0,20.0,5.0,78.0,3.0,0.0,5.0,0.0,2.0,65.0,23.0,8.0,0.0,2.0,…,0.0,50.0,64.0,0.0,0.0,18.0,5.0,88.0,7.0,5.0,0.0,10.0,15.0,82.0,24.0,21.0,86.0,12.0,0.0,10.0,30.0,90.0,10.0,10.0,20.0,15.0,35.0,104.0,45.0,15.0,44.0,62.0,5.0,7.0,133.0,0.0,0.0
"""53450-6351-1""",0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,9.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""9967999""",0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""350058000""",0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0
"""FTF31-1182B""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""08101-06305""",0.0,190.0,50.0,141.0,0.0,0.0,645.0,230.0,375.0,875.0,180.0,10.0,75.0,90.0,30.0,85.0,83.0,6.0,795.0,144.0,97.0,824.0,141.0,10.0,123.0,280.0,38.0,205.0,18.0,60.0,596.0,15.0,491.0,625.0,115.0,28.0,…,0.0,130.0,55.0,62.0,755.0,125.0,412.0,704.0,118.0,61.0,200.0,130.0,50.0,101.0,120.0,71.0,921.0,155.0,195.0,1271.0,190.0,60.0,30.0,488.0,5.0,45.0,10.0,89.0,598.0,105.0,47.0,699.0,65.0,145.0,68.0,80.0,0.0
"""T4930-80651""",0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,…,0.0,0.0,3.0,1.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,3.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
"""CE11-0319A""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""T4682-55393""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
part_numbers = aligned_df.get_column('oper_part_no')
X_np = aligned_df.select(pl.all().exclude("oper_part_no")).to_numpy()
X_tensor = torch.tensor(X_np, dtype=torch.float32).unsqueeze(-1)  # shape (N, T, 1)

In [5]:
model = KShapeClusteringGPU(n_clusters=5, centroid_init = 'random', max_iter=100)
model.fit(X_tensor)

labels = model.labels_
centroids = model.centroids_

result = pl.DataFrame({
    'oper_part_no': part_numbers,
    'cluster_label': labels.astype(int)
})

result

  x = torch.tensor(x, device = device, dtype = torch.float32)


NotImplementedError: The operator 'aten::_linalg_eigh.eigenvalues' is not currently implemented for the MPS device. If you want this op to be considered for addition please comment on https://github.com/pytorch/pytorch/issues/141287 and mention use-case, that resulted in missing op as well as commit hash e2d141dbde55c2a4370fac5165b0561b6af4798b. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.