# Convert CSV to Parquet

In [10]:
import polars as pl
from_path = 'D:/data/dsio'
to_path = '/modeling_module/data'

(
    pl.scan_csv(f'{from_path}/tb_dyn_fcst_dmnd.csv'
                , has_header=True
                , infer_schema_length = 100_000
                , dtypes = {'CONT_AMT': pl.Float64})
 .collect().write_parquet(f"{to_path}/tb_dyn_fcst_dmnd.parquet")
)

In [12]:
(
    pl.scan_csv(f'{from_path}/tb_dyn_fcst_dmnd_sellout.csv'
                , has_header = True
                , infer_schema_length = 100_000
                , dtypes = {'CONT_AMT': pl.Float64})
    .collect().write_parquet(f"{to_path}/tb_dyn_fcst_dmnd_sellout.parquet")
)

In [13]:
(
    pl.scan_csv(f'{from_path}/tb_bas_oper_part_mst.csv'
                , has_header = True
                , infer_schema_length = 100_000
                , dtypes = {'CONT_AMT': pl.Float64})
    .collect().write_parquet(f"{to_path}/tb_bas_oper_part_mst.parquet")
)

In [4]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.version.cuda)
print(torch.__version__)
print(torch.cuda.get_device_name(0))
print(torch.__version__)

False
0
None
2.6.0+cpu


AssertionError: Torch not compiled with CUDA enabled

In [3]:
import sys
print(sys.executable)

C:\Users\USER\python\py312\python.exe


In [1]:
import polars as pl

In [4]:
import polars as pl

# 모든 값의 dtype은 Int64이며, NULL 포함
df_bad = pl.DataFrame({
    "oper_part_no":      ["A",   "B",   "A",     "B",   "A",   "B"],
    "order_yyyyww":      [202240, 202035, 202241, 202036, 202242, 202037],
    "wty_start_yyyyww":  [None,  202030, 202211,  None,  None,  None],  # 모두 Int64 또는 Null
}, schema={
    "oper_part_no": pl.Utf8,
    "order_yyyyww": pl.Int64,
    "wty_start_yyyyww": pl.Int64,   # ← dtype 고정(Int64)
})

print("원본")
print(df_bad)

# (문제) 그룹/정렬 없이 바로 backward 채움
df_wrong = df_bad.with_columns(
    pl.col("wty_start_yyyyww")
      .fill_null(strategy="backward")
      .alias("wty_filled")
)

print("\n그룹/정렬 없이 backward 채움 (문제 재현)")
print(df_wrong)

원본
shape: (6, 3)
┌──────────────┬──────────────┬──────────────────┐
│ oper_part_no ┆ order_yyyyww ┆ wty_start_yyyyww │
│ ---          ┆ ---          ┆ ---              │
│ str          ┆ i64          ┆ i64              │
╞══════════════╪══════════════╪══════════════════╡
│ A            ┆ 202240       ┆ null             │
│ B            ┆ 202035       ┆ 202030           │
│ A            ┆ 202241       ┆ 202211           │
│ B            ┆ 202036       ┆ null             │
│ A            ┆ 202242       ┆ null             │
│ B            ┆ 202037       ┆ null             │
└──────────────┴──────────────┴──────────────────┘

그룹/정렬 없이 backward 채움 (문제 재현)
shape: (6, 4)
┌──────────────┬──────────────┬──────────────────┬────────────┐
│ oper_part_no ┆ order_yyyyww ┆ wty_start_yyyyww ┆ wty_filled │
│ ---          ┆ ---          ┆ ---              ┆ ---        │
│ str          ┆ i64          ┆ i64              ┆ i64        │
╞══════════════╪══════════════╪══════════════════╪════════════╡
│ A    

In [5]:
df_fixed = (
    df_bad
    .sort(["oper_part_no", "order_yyyyww"])   # 파트별 시간 정렬
    .with_columns(
        pl.col("wty_start_yyyyww")
          .fill_null(strategy="backward")
          .over("oper_part_no")               # ← 파트별로만 채움(타 파트 유입 방지)
          .alias("wty_filled_bwd")
    )
)

print("\n해결1: 정렬 + over('oper_part_no')로 backward 채움")
print(df_fixed)


해결1: 정렬 + over('oper_part_no')로 backward 채움
shape: (6, 4)
┌──────────────┬──────────────┬──────────────────┬────────────────┐
│ oper_part_no ┆ order_yyyyww ┆ wty_start_yyyyww ┆ wty_filled_bwd │
│ ---          ┆ ---          ┆ ---              ┆ ---            │
│ str          ┆ i64          ┆ i64              ┆ i64            │
╞══════════════╪══════════════╪══════════════════╪════════════════╡
│ A            ┆ 202240       ┆ null             ┆ 202211         │
│ A            ┆ 202241       ┆ 202211           ┆ 202211         │
│ A            ┆ 202242       ┆ null             ┆ null           │
│ B            ┆ 202035       ┆ 202030           ┆ 202030         │
│ B            ┆ 202036       ┆ null             ┆ null           │
│ B            ┆ 202037       ┆ null             ┆ null           │
└──────────────┴──────────────┴──────────────────┴────────────────┘
