In [1]:
# # -*- coding: utf-8 -*-
# """
# 하루 단위 → 시간 단위(시/스케줄 블록) 공정 시뮬레이션 (일/시간 슬롯 분해 강제)
# - 입력 CSV: subset_20240101_20250630_fullcols.csv (24-01-01 ~ 25-06-30 날짜 포함)
# - 입력이 이미 '하루 1행'이면 집계를 건너뛰고 곧장 슬롯 분해
# - 출력은 날짜×슬롯 개수만큼 행 생성

# [중요 변경]
# - Queue 분배를 L=λ×W 대신 '하루 평균 Queue 비율 보존' 방식으로 변경
#   * 작업 슬롯(work)에는 prod_weight 비례 분배
#   * 휴식/식사 슬롯(rest)은 직전 값 유지(carry-over)
#   * 마지막에 전체 스케일 재보정(하루 평균 유지) → 원본 병목 경향 유지

# ※ 하루 창: 07:50 ~ (다음날) 07:40
#   - 자정 이후(00:00~07:40)는 자동으로 '다음날'로 정렬됨
#   - 20:10~20:40 공백은 '휴식' 블록으로 채움
# """

# import numpy as np
# import pandas as pd

# # =========================
# # 1) CONFIG
# # =========================
# INPUT_CSV  = "./subset_20240101_20250630_fullcols.csv"
# OUTPUT_CSV = "./FinalResults_time_scheduled_20240101_20250630.csv"
# GRANULARITY = "hourly"  # "hourly" or "block"

# # ▼ 현장 스케줄 + 20:10~20:40 교대 휴식 추가
# SCHEDULE_BLOCKS = [
#     ("07:50", "09:50", "주간"),
#     ("09:50", "10:00", "휴식"),
#     ("10:00", "11:50", "주간"),
#     ("11:50", "12:40", "중식"),
#     ("12:40", "14:40", "주간"),
#     ("14:40", "14:50", "휴식"),
#     ("14:50", "16:40", "주간"),
#     ("16:40", "17:10", "석식"),
#     ("17:10", "20:10", "잔업"),
#     ("20:10", "20:40", "휴식"),   # 교대 공백 보정
#     ("20:40", "00:00", "야간"),
#     ("00:00", "00:30", "야식"),   # 다음날
#     ("00:30", "04:00", "야간"),   # 다음날
#     ("04:00", "05:00", "휴식"),   # 다음날
#     ("05:00", "07:40", "야간"),   # 다음날
# ]

# # 근무·휴식 레이블별 가중치
# SHIFT_WEIGHTS = {
#     "주간": 1.0,
#     "잔업": 0.9,
#     "야간": 1.2,
#     "중식": 0.0,
#     "석식": 0.0,
#     "야식": 0.0,
#     "휴식": 0.0,
# }

# # Util/Time 변동폭
# UTIL_BAND = 0.15     # ±15%
# TIME_BAND = 0.10     # ±10%


# # =========================
# # 2) HELPERS
# # =========================
# def to_dt(s):
#     try:
#         return pd.to_datetime(s, errors="coerce")
#     except Exception:
#         return pd.to_datetime(s.astype(str), errors="coerce")

# def detect_date_col(df: pd.DataFrame) -> str:
#     for c in ["Time_Now", "Timestamp", "timestamp", "Date", "date"]:
#         if c in df.columns:
#             return c
#     return df.columns[0]

# def day_floor(dt: pd.Series) -> pd.Series:
#     return dt.dt.floor("D")

# def aggregate_to_daily(df: pd.DataFrame, date_col: str):
#     """일 단위 집계: 생산량 sum, 시간/유틸/큐 mean, 기타 first"""
#     dt = to_dt(df[date_col])
#     if dt.isna().all():
#         raise ValueError(f"Could not parse datetime from '{date_col}'")
#     df = df.copy()
#     df["_date"] = day_floor(dt)

#     count_cols = [c for c in df.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
#     time_cols  = [c for c in df.columns if c.endswith("_Time")]
#     util_cols  = [c for c in df.columns if c.endswith("_Util")]
#     queue_cols = [c for c in df.columns if c.endswith("_Queue")]
#     other_cols = sorted(set(df.columns) - set([date_col, "_date"] + count_cols + time_cols + util_cols + queue_cols))

#     agg = {}
#     for c in count_cols: agg[c] = "sum"
#     for c in time_cols + util_cols + queue_cols: agg[c] = "mean"
#     for c in other_cols: agg[c] = "first"

#     daily = df.groupby("_date", as_index=False).agg(agg).sort_values("_date").reset_index(drop=True)
#     daily[date_col] = daily["_date"]
#     return daily.drop(columns=["_date"]), count_cols, time_cols, util_cols, queue_cols, date_col

# def build_schedule_for_day(day: pd.Timestamp) -> pd.DataFrame:
#     """
#     'day'의 근무일을 07:50 ~ (다음날) 07:40 으로 정의하고,
#     SCHEDULE_BLOCKS(HH:MM)를 해당 윈도에 맞게 정렬/클램프하여 반환.
#     - 07:50보다 이른 HH:MM은 모두 '다음날'로 간주(+1일)
#     - 최종 윈도 밖 구간은 잘라냄
#     """
#     rows = []
#     anchor_start = pd.Timestamp(day.year, day.month, day.day, 7, 50)
#     anchor_end   = pd.Timestamp(day.year, day.month, day.day, 7, 40) + pd.Timedelta(days=1)

#     for start_str, end_str, label in SCHEDULE_BLOCKS:
#         s_h, s_m = map(int, start_str.split(":"))
#         e_h, e_m = map(int, end_str.split(":"))

#         # 시작/종료 시각
#         start = pd.Timestamp(day.year, day.month, day.day, s_h, s_m)
#         end   = pd.Timestamp(day.year, day.month, day.day, e_h, e_m)

#         # 자정/창 보정
#         if (s_h, s_m) < (7, 50): start += pd.Timedelta(days=1)
#         if (e_h, e_m) < (7, 50) or end <= start: end += pd.Timedelta(days=1)

#         # 창으로 클램프
#         s = max(start, anchor_start)
#         e = min(end, anchor_end)
#         if e <= s: 
#             continue

#         minutes = int((e - s).total_seconds() // 60)
#         rows.append({"start": s, "end": e, "label": label, "minutes": minutes})
#     return pd.DataFrame(rows)

# def explode_to_hours(sch: pd.DataFrame) -> pd.DataFrame:
#     """블록 → 정시 슬롯 분해 + 각 슬롯과의 겹침 분(min_overlap) 계산"""
#     t_min = sch["start"].min().floor("h")
#     t_max = sch["end"].max().ceil("h")
#     hours = pd.date_range(t_min, t_max, freq="h")
#     slots = pd.DataFrame({"slot_start": hours[:-1], "slot_end": hours[1:]})

#     out = []
#     for _, blk in sch.iterrows():
#         for _, sl in slots.iterrows():
#             s = max(blk["start"], sl["slot_start"])
#             e = min(blk["end"], sl["slot_end"])
#             overlap = (e - s).total_seconds() / 60.0
#             if overlap > 0:
#                 out.append({
#                     "slot_start": sl["slot_start"],
#                     "slot_end": sl["slot_end"],
#                     "label": blk["label"],
#                     "min_overlap": overlap
#                 })
#     return pd.DataFrame(out)

# def integer_allocate(total: float, weights: np.ndarray) -> np.ndarray:
#     """정수 분배(잔차 보정)"""
#     w = np.clip(np.asarray(weights, float), 0, None)
#     if w.sum() <= 0 or total <= 0:
#         return np.zeros_like(w, dtype=int)
#     p = w / w.sum()
#     alloc = np.floor(total * p).astype(int)
#     residual = int(round(total - alloc.sum()))
#     if residual > 0:
#         idx = np.argsort(-p)[:residual]
#         alloc[idx] += 1
#     return alloc


# # =========================
# # 3) MAIN TRANSFORM
# # =========================
# def make_time_scheduled(df_daily: pd.DataFrame,
#                         count_cols, time_cols, util_cols, queue_cols,
#                         date_col: str,
#                         granularity: str = "hourly") -> pd.DataFrame:
#     rows = []

#     # SKU별 Wait 컬럼 매핑 (예: "SKU3_Wait_Time")
#     sku_wait_cols = {c.split("_")[0]: c for c in time_cols if c.startswith("SKU") and c.endswith("Wait_Time")}
#     overall_wait_name = next((c for c in time_cols if c.endswith("Wait_Time")), None)

#     for _, day_row in df_daily.iterrows():
#         day = pd.to_datetime(day_row[date_col]).floor("D")

#         # 1) 해당 일 스케줄 → 슬롯
#         sch = build_schedule_for_day(day)
#         if sch.empty:
#             continue

#         if granularity == "block":
#             slot_df = sch.rename(columns={"start":"slot_start","end":"slot_end"})
#             slot_df["min_overlap"] = (slot_df["slot_end"] - slot_df["slot_start"]).dt.total_seconds()/60.0
#             slot_df = slot_df[["slot_start","slot_end","label","min_overlap"]]
#         else:
#             slot_df = explode_to_hours(sch)

#         # 2) 생산 가중치
#         slot_df["shift_weight"] = slot_df["label"].map(SHIFT_WEIGHTS).fillna(0.0)
#         slot_df["work_minutes"] = slot_df["min_overlap"] * (slot_df["shift_weight"] > 0).astype(float)
#         slot_df["prod_weight"]  = slot_df["work_minutes"] * slot_df["shift_weight"]

#         # 3) count 분배(정수)
#         for c in count_cols:
#             total = float(day_row[c])
#             slot_df[c] = integer_allocate(total, slot_df["prod_weight"].values)

#         # 4) Util/Time 분배
#         safe_eps = 1e-12
#         work_mask = slot_df["shift_weight"].values > 0
#         sw = slot_df["shift_weight"].values.copy()
#         if work_mask.any():
#             sw_norm = np.zeros_like(sw, dtype=float)
#             mean_sw = sw[work_mask].mean()
#             sw_norm[work_mask] = sw[work_mask] / (mean_sw + safe_eps)
#         else:
#             sw_norm = np.zeros_like(sw, dtype=float)

#         for c in util_cols:
#             mean_u = float(day_row[c])
#             u = np.zeros(len(slot_df), dtype=float)
#             u[work_mask] = mean_u * (1.0 - UTIL_BAND + 2*UTIL_BAND * sw_norm[work_mask])
#             slot_df[c] = np.clip(u, 0, 1)

#         for c in time_cols:
#             mean_t = float(day_row[c])
#             t = np.zeros(len(slot_df), dtype=float)
#             if mean_t != 0 and work_mask.any():
#                 t[work_mask] = mean_t * (1.0 - TIME_BAND + 2*TIME_BAND * sw_norm[work_mask])
#             slot_df[c] = t

#         # 5) Queue 분배: 하루 비율 보존 + 휴식 carry-over + 스케일 재보정
#         #    - 하루 평균 queue(daily_mean_q)를 '작업 슬롯 prod_weight 비례'로 우선 분배
#         #    - 휴식/식사 슬롯은 직전 값 유지(carry-over)
#         #    - 전체 합/평균이 원래와 동일해지도록 마지막에 스케일 재보정
#         n_slots = len(slot_df)
#         for qc in queue_cols:
#             daily_mean_q = float(day_row[qc])

#             if daily_mean_q == 0.0:
#                 # 하루 평균이 0이면 전 슬롯 0
#                 slot_df[qc] = 0.0
#                 continue

#             # (a) 작업 슬롯에만 분배
#             w = slot_df["prod_weight"].values.astype(float)
#             work_idx = np.where(w > 0)[0]
#             rest_idx = np.where(w <= 0)[0]

#             q = np.zeros(n_slots, dtype=float)
#             if work_idx.size > 0 and w[work_idx].sum() > 0:
#                 w_norm = w[work_idx] / w[work_idx].sum()
#                 # 하루 평균 × 슬롯 수 = 하루 총합 → 작업 슬롯에 비례 분배
#                 q[work_idx] = daily_mean_q * n_slots * w_norm
#             else:
#                 # 전부 휴식인 재난적 상황: 균등 분배
#                 q[:] = daily_mean_q

#             # (b) 휴식/식사 슬롯 carry-over (직전 값 유지)
#             #     맨 앞이 휴식이면 뒤에서 첫 작업 값을 찾아 적용(없으면 0 유지)
#             for idx in rest_idx:
#                 if idx == 0:
#                     # 처음부터 휴식이면 다음에 나오는 첫 작업 값으로 채움(없으면 0)
#                     next_work = q[work_idx[0]] if work_idx.size > 0 else 0.0
#                     q[idx] = next_work
#                 else:
#                     q[idx] = q[idx-1]

#             # (c) 스케일 재보정: 최종 평균 = daily_mean_q 로 맞춤
#             target_sum = daily_mean_q * n_slots
#             cur_sum = q.sum()
#             if cur_sum > 0:
#                 q *= (target_sum / cur_sum)

#             slot_df[qc] = q

#         # 6) 출력 누적
#         for _, r in slot_df.iterrows():
#             out = {"Time_Now": r["slot_start"], "Time_End": r["slot_end"], "ShiftLabel": r["label"]}
#             for c in count_cols + util_cols + time_cols + queue_cols:
#                 out[c] = r.get(c, 0)
#             rows.append(out)

#     result = pd.DataFrame(rows).sort_values(["Time_Now"]).reset_index(drop=True)

#     # 컬럼 순서 정리
#     keep_order, all_cols = [], set(count_cols + util_cols + time_cols + queue_cols)
#     for c in df_daily.columns:
#         if c in all_cols:
#             keep_order.append(c)
#     for c in (set(all_cols) - set(keep_order)):
#         keep_order.append(c)
#     result = result.reindex(columns=["Time_Now","Time_End","ShiftLabel"] + keep_order)
#     return result


# # =========================
# # 4) RUN
# # =========================
# if __name__ == "__main__":
#     # 1) 로드
#     full = pd.read_csv(INPUT_CSV, low_memory=False)

#     # 2) 날짜 컬럼 탐지 및 datetime 변환
#     date_col = detect_date_col(full)
#     full[date_col] = to_dt(full[date_col])
#     if full[date_col].isna().all():
#         raise ValueError(f"날짜 파싱 실패: '{date_col}' 컬럼을 datetime으로 변환할 수 없습니다.")
#     full["_date"] = full[date_col].dt.floor("D")

#     # 3) 입력 형태 판단
#     nunique_dates = full["_date"].nunique()
#     is_daily_like = (len(full) == nunique_dates)

#     # 4) 열 그룹화
#     count_cols = [c for c in full.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
#     time_cols  = [c for c in full.columns if c.endswith("_Time")]
#     util_cols  = [c for c in full.columns if c.endswith("_Util")]
#     queue_cols = [c for c in full.columns if c.endswith("_Queue")]

#     # 5) 일 집계/스킵
#     if is_daily_like:
#         df_daily = full.drop(columns=["_date"]).copy()
#         df_daily[date_col] = full["_date"].values
#     else:
#         df_daily = full.drop(columns=["_date"]).copy()
#         df_daily, count_cols, time_cols, util_cols, queue_cols, date_col = aggregate_to_daily(df_daily, date_col)

#     # 6) 시간표 기반 분해
#     out = make_time_scheduled(
#         df_daily,
#         count_cols=count_cols,
#         time_cols=time_cols,
#         util_cols=util_cols,
#         queue_cols=queue_cols,
#         date_col=date_col,
#         granularity=GRANULARITY,
#     )

#     # 7) 저장 (Excel 한글 안 깨지게)
#     out.to_csv(OUTPUT_CSV, index=False, encoding="utf-8-sig")

#     # 8) 진단 로그
#     sample_day = pd.to_datetime(df_daily[date_col].iloc[0])
#     sample_slots = (explode_to_hours(build_schedule_for_day(sample_day))
#                     if GRANULARITY == "hourly"
#                     else build_schedule_for_day(sample_day))
#     slots_per_day = len(sample_slots) if GRANULARITY == "hourly" else len(SCHEDULE_BLOCKS)

#     print(f"[OK] Saved -> {OUTPUT_CSV}")
#     print(f"입력 날짜 수: {nunique_dates:,} | 대표 하루 슬롯 수: {slots_per_day}")
#     print(f"예상 행수 ≈ {nunique_dates * slots_per_day:,} | 실제 행수 = {len(out):,}")

[OK] Saved -> ./FinalResults_time_scheduled_20240101_20250630.csv
입력 날짜 수: 547 | 대표 하루 슬롯 수: 35
예상 행수 ≈ 19,145 | 실제 행수 = 19,145


In [3]:
# # -*- coding: utf-8 -*-
# """
# 하루 → 시간(정시/블록) 분해 + '하루 병목 경향' 유지용 그룹 우선도 적용
# - 입력: subset_20240101_20250630_fullcols.csv
# - 출력: FinalResults_time_scheduled_20240101_20250630.csv

# 핵심:
# - Queue 분배: 작업 슬롯(prod_weight) 비례 × 그룹 우선도(bias)
# - 휴식/식사 슬롯: 직전 값 유지(carry-over)
# - 최종 스케일 재보정(하루 평균 유지)
# - 그룹 우선도: 일 단위에서 '그룹별 일평균 Queue 합'으로 일별 Top 그룹을 집계 → 빈도 기반 가중치
#   + 필요시 PRIORITY_OVERRIDE로 수동 보정(예: 창고/포크리프트 ↑, 프레스 ↓)
# """

# import numpy as np
# import pandas as pd
# import re

# # =========================
# # 1) CONFIG
# # =========================
# INPUT_CSV  = "./subset_20240101_20250630_fullcols.csv"
# OUTPUT_CSV = "./FinalResults_time_scheduled_20240101_20250630.csv"
# GRANULARITY = "hourly"  # "hourly" or "block"

# # 스케줄 (07:50 ~ 다음날 07:40, 교대 휴식 포함)
# SCHEDULE_BLOCKS = [
#     ("07:50", "09:50", "주간"),
#     ("09:50", "10:00", "휴식"),
#     ("10:00", "11:50", "주간"),
#     ("11:50", "12:40", "중식"),
#     ("12:40", "14:40", "주간"),
#     ("14:40", "14:50", "휴식"),
#     ("14:50", "16:40", "주간"),
#     ("16:40", "17:10", "석식"),
#     ("17:10", "20:10", "잔업"),
#     ("20:10", "20:40", "휴식"),
#     ("20:40", "00:00", "야간"),
#     ("00:00", "00:30", "야식"),
#     ("00:30", "04:00", "야간"),
#     ("04:00", "05:00", "휴식"),
#     ("05:00", "07:40", "야간"),
# ]

# # 근무·휴식 레이블별 가중치
# SHIFT_WEIGHTS = {"주간":1.0, "잔업":0.9, "야간":1.2, "중식":0.0, "석식":0.0, "야식":0.0, "휴식":0.0}

# # Util/Time 변동폭
# UTIL_BAND = 0.15     # ±15%
# TIME_BAND = 0.10     # ±10%

# # 우선도 범위(자동 산출)
# AUTO_BIAS_MIN = 0.85
# AUTO_BIAS_MAX = 1.15

# # 수동 보정(원하면 값 조정; 키는 그룹명)
# #   예: 창고/포크리프트 ↑, 프레스/셀 ↓
# PRIORITY_OVERRIDE = {
#     "Warehouse": 1.12,
#     "Forklift_Blanking": 1.10,
#     "Forklift_Press": 1.08,
#     "Forklift_Assembly": 1.08,
#     "Press": 0.95,
#     "Cell": 0.97,
#     # "Blanking": 1.00, "Quality": 1.00, "Paint": 0.90  # 필요 시
# }


# # =========================
# # 2) HELPERS
# # =========================
# def to_dt(s):
#     try:
#         return pd.to_datetime(s, errors="coerce")
#     except Exception:
#         return pd.to_datetime(s.astype(str), errors="coerce")

# def detect_date_col(df: pd.DataFrame) -> str:
#     for c in ["Time_Now", "Timestamp", "timestamp", "Date", "date"]:
#         if c in df.columns:
#             return c
#     return df.columns[0]

# def day_floor(dt: pd.Series) -> pd.Series:
#     return dt.dt.floor("D")

# def build_schedule_for_day(day: pd.Timestamp) -> pd.DataFrame:
#     rows = []
#     anchor_start = pd.Timestamp(day.year, day.month, day.day, 7, 50)
#     anchor_end   = pd.Timestamp(day.year, day.month, day.day, 7, 40) + pd.Timedelta(days=1)
#     for start_str, end_str, label in SCHEDULE_BLOCKS:
#         s_h, s_m = map(int, start_str.split(":"))
#         e_h, e_m = map(int, end_str.split(":"))
#         start = pd.Timestamp(day.year, day.month, day.day, s_h, s_m)
#         end   = pd.Timestamp(day.year, day.month, day.day, e_h, e_m)
#         if (s_h, s_m) < (7, 50): start += pd.Timedelta(days=1)
#         if (e_h, e_m) < (7, 50) or end <= start: end += pd.Timedelta(days=1)
#         s = max(start, anchor_start); e = min(end, anchor_end)
#         if e <= s: 
#             continue
#         minutes = int((e - s).total_seconds() // 60)
#         rows.append({"start": s, "end": e, "label": label, "minutes": minutes})
#     return pd.DataFrame(rows)

# def explode_to_hours(sch: pd.DataFrame) -> pd.DataFrame:
#     t_min = sch["start"].min().floor("h")
#     t_max = sch["end"].max().ceil("h")
#     hours = pd.date_range(t_min, t_max, freq="h")
#     slots = pd.DataFrame({"slot_start": hours[:-1], "slot_end": hours[1:]})
#     out = []
#     for _, blk in sch.iterrows():
#         for _, sl in slots.iterrows():
#             s = max(blk["start"], sl["slot_start"])
#             e = min(blk["end"], sl["slot_end"])
#             overlap = (e - s).total_seconds() / 60.0
#             if overlap > 0:
#                 out.append({
#                     "slot_start": sl["slot_start"],
#                     "slot_end": sl["slot_end"],
#                     "label": blk["label"],
#                     "min_overlap": overlap
#                 })
#     return pd.DataFrame(out)

# def integer_allocate(total: float, weights: np.ndarray) -> np.ndarray:
#     w = np.clip(np.asarray(weights, float), 0, None)
#     if w.sum() <= 0 or total <= 0:
#         return np.zeros_like(w, dtype=int)
#     p = w / w.sum()
#     base = np.floor(total * p).astype(int)
#     residual = int(round(total - base.sum()))
#     if residual > 0:
#         idx = np.argsort(-p)[:residual]
#         base[idx] += 1
#     return base

# # --- 컬럼 → 그룹명 매핑 ---
# # 그룹 기준: Blanking, Press, Cell, Paint, Quality, Warehouse, Forklift_Blanking, Forklift_Press, Forklift_Assembly
# def map_col_to_group(col: str) -> str:
#     low = col.lower()
#     if "forklift_blanking" in low: return "Forklift_Blanking"
#     if "forklift_press" in low:    return "Forklift_Press"
#     if "forklift_assembly" in low: return "Forklift_Assembly"
#     if "warehouse" in low:         return "Warehouse"
#     if low.startswith("blanking") or low == "blanking_queue": return "Blanking"
#     if low.startswith("press"):    return "Press"
#     if low.startswith("cell"):     return "Cell"
#     if low.startswith("paint"):    return "Paint"
#     if "quality" in low:           return "Quality"
#     return "Other"

# def build_group_map(queue_cols):
#     grp = {}
#     for c in queue_cols:
#         g = map_col_to_group(c)
#         grp.setdefault(g, []).append(c)
#     # 쓰지 않을 그룹 정리
#     if "Other" in grp and len(grp["Other"]) == 0:
#         grp.pop("Other", None)
#     return grp

# # --- 일 단위에서 '탑 그룹' 빈도로 우선도 산출 ---
# def derive_group_bias_from_daily(df_daily: pd.DataFrame, queue_cols) -> dict:
#     group_map = build_group_map(queue_cols)
#     if not group_map:
#         return {}

#     # 각 일(row)마다: 그룹별 일평균 Queue 합 → 최대 그룹 1표
#     votes = {g: 0 for g in group_map}
#     for _, r in df_daily.iterrows():
#         scores = {}
#         for g, cols in group_map.items():
#             # 일 평균(집계 테이블은 이미 mean), 여러 컬럼이면 합으로 그룹 점수
#             vals = [float(r[c]) for c in cols if c in r.index]
#             if len(vals) == 0: 
#                 continue
#             scores[g] = np.nansum(vals)
#         if scores:
#             gtop = max(scores, key=scores.get)
#             votes[gtop] += 1

#     # 빈도 → [AUTO_BIAS_MIN, AUTO_BIAS_MAX] 선형 매핑
#     freqs = np.array(list(votes.values()), dtype=float)
#     if freqs.max() == freqs.min():  # 전부 동일할 때
#         auto = {g: 1.0 for g in votes}
#     else:
#         fmin, fmax = freqs.min(), freqs.max()
#         auto = {}
#         for g, v in votes.items():
#             # 표준화 → 스케일
#             norm = (v - fmin) / (fmax - fmin + 1e-9)
#             auto[g] = AUTO_BIAS_MIN + (AUTO_BIAS_MAX - AUTO_BIAS_MIN) * norm

#     # 수동 보정 곱해주기(키가 없으면 1.0)
#     out = {}
#     for g in auto:
#         out[g] = float(auto[g]) * float(PRIORITY_OVERRIDE.get(g, 1.0))
#     return out


# # =========================
# # 3) DAILY AGG (필요시)
# # =========================
# def aggregate_to_daily(df: pd.DataFrame, date_col: str):
#     dt = to_dt(df[date_col])
#     if dt.isna().all():
#         raise ValueError(f"Could not parse datetime from '{date_col}'")
#     df = df.copy()
#     df["_date"] = day_floor(dt)

#     count_cols = [c for c in df.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
#     time_cols  = [c for c in df.columns if c.endswith("_Time")]
#     util_cols  = [c for c in df.columns if c.endswith("_Util")]
#     queue_cols = [c for c in df.columns if c.endswith("_Queue")]
#     other_cols = sorted(set(df.columns) - set([date_col, "_date"] + count_cols + time_cols + util_cols + queue_cols))

#     agg = {}
#     for c in count_cols: agg[c] = "sum"
#     for c in time_cols + util_cols + queue_cols: agg[c] = "mean"
#     for c in other_cols: agg[c] = "first"

#     daily = df.groupby("_date", as_index=False).agg(agg).sort_values("_date").reset_index(drop=True)
#     daily[date_col] = daily["_date"]
#     return daily.drop(columns=["_date"]), count_cols, time_cols, util_cols, queue_cols, date_col


# # =========================
# # 4) MAIN TRANSFORM
# # =========================
# def make_time_scheduled(df_daily: pd.DataFrame,
#                         count_cols, time_cols, util_cols, queue_cols,
#                         date_col: str,
#                         granularity: str = "hourly") -> pd.DataFrame:
#     rows = []

#     # 그룹 우선도(일 단위 경향 기반) 계산
#     group_bias = derive_group_bias_from_daily(df_daily, queue_cols)

#     for _, day_row in df_daily.iterrows():
#         day = pd.to_datetime(day_row[date_col]).floor("D")

#         # 스케줄 → 슬롯
#         sch = build_schedule_for_day(day)
#         if sch.empty:
#             continue
#         if granularity == "block":
#             slot_df = sch.rename(columns={"start":"slot_start","end":"slot_end"})
#             slot_df["min_overlap"] = (slot_df["slot_end"] - slot_df["slot_start"]).dt.total_seconds()/60.0
#             slot_df = slot_df[["slot_start","slot_end","label","min_overlap"]]
#         else:
#             slot_df = explode_to_hours(sch)

#         # 생산 가중치
#         slot_df["shift_weight"] = slot_df["label"].map(SHIFT_WEIGHTS).fillna(0.0)
#         slot_df["work_minutes"] = slot_df["min_overlap"] * (slot_df["shift_weight"] > 0).astype(float)
#         slot_df["prod_weight"]  = slot_df["work_minutes"] * slot_df["shift_weight"]

#         # count 분배(정수)
#         for c in count_cols:
#             total = float(day_row[c])
#             slot_df[c] = integer_allocate(total, slot_df["prod_weight"].values)

#         # Util/Time 분배
#         safe_eps = 1e-12
#         work_mask = slot_df["shift_weight"].values > 0
#         sw = slot_df["shift_weight"].values.copy()
#         if work_mask.any():
#             sw_norm = np.zeros_like(sw, dtype=float)
#             mean_sw = sw[work_mask].mean()
#             sw_norm[work_mask] = sw[work_mask] / (mean_sw + safe_eps)
#         else:
#             sw_norm = np.zeros_like(sw, dtype=float)

#         for c in util_cols:
#             mean_u = float(day_row[c])
#             u = np.zeros(len(slot_df), dtype=float)
#             u[work_mask] = mean_u * (1.0 - UTIL_BAND + 2*UTIL_BAND * sw_norm[work_mask])
#             slot_df[c] = np.clip(u, 0, 1)

#         for c in time_cols:
#             mean_t = float(day_row[c])
#             t = np.zeros(len(slot_df), dtype=float)
#             if mean_t != 0 and work_mask.any():
#                 t[work_mask] = mean_t * (1.0 - TIME_BAND + 2*TIME_BAND * sw_norm[work_mask])
#             slot_df[c] = t

#         # Queue 분배: prod_weight × 그룹 우선도 → 휴식 carry-over → 스케일 재보정
#         n_slots = len(slot_df)
#         w = slot_df["prod_weight"].values.astype(float)
#         work_idx = np.where(w > 0)[0]
#         rest_idx = np.where(w <= 0)[0]

#         for qc in queue_cols:
#             daily_mean_q = float(day_row[qc])
#             if daily_mean_q == 0.0:
#                 slot_df[qc] = 0.0
#                 continue

#             # 컬럼의 그룹 찾기 → 그룹 우선도 적용
#             g = map_col_to_group(qc)
#             bias = float(group_bias.get(g, 1.0))

#             q = np.zeros(n_slots, dtype=float)
#             if work_idx.size > 0 and w[work_idx].sum() > 0:
#                 w_norm = w[work_idx] / w[work_idx].sum()
#                 # 하루 평균 × 슬롯 수 = 하루 총합 → 작업 슬롯에 분배 + 그룹 우선도
#                 q[work_idx] = daily_mean_q * n_slots * w_norm * bias
#             else:
#                 q[:] = daily_mean_q * bias

#             # 휴식 carry-over
#             for idx in rest_idx:
#                 if idx == 0:
#                     next_val = q[work_idx[0]] if work_idx.size > 0 else 0.0
#                     q[idx] = next_val
#                 else:
#                     q[idx] = q[idx-1]

#             # 스케일 재보정(하루 평균 유지)
#             target_sum = daily_mean_q * n_slots
#             cur_sum = q.sum()
#             if cur_sum > 0:
#                 q *= (target_sum / cur_sum)

#             slot_df[qc] = q

#         # 출력 누적
#         for _, r in slot_df.iterrows():
#             out = {"Time_Now": r["slot_start"], "Time_End": r["slot_end"], "ShiftLabel": r["label"]}
#             for c in count_cols + util_cols + time_cols + queue_cols:
#                 out[c] = r.get(c, 0)
#             rows.append(out)

#     result = pd.DataFrame(rows).sort_values(["Time_Now"]).reset_index(drop=True)

#     # 컬럼 순서 정리
#     keep_order, all_cols = [], set(count_cols + util_cols + time_cols + queue_cols)
#     for c in df_daily.columns:
#         if c in all_cols:
#             keep_order.append(c)
#     for c in (set(all_cols) - set(keep_order)):
#         keep_order.append(c)
#     result = result.reindex(columns=["Time_Now","Time_End","ShiftLabel"] + keep_order)
#     return result


# # =========================
# # 5) RUN
# # =========================
# if __name__ == "__main__":
#     # 1) 로드
#     full = pd.read_csv(INPUT_CSV, low_memory=False)

#     # 2) 날짜 컬럼
#     date_col = detect_date_col(full)
#     full[date_col] = to_dt(full[date_col])
#     if full[date_col].isna().all():
#         raise ValueError(f"날짜 파싱 실패: '{date_col}'를 datetime으로 변환할 수 없습니다.")
#     full["_date"] = full[date_col].dt.floor("D")

#     # 3) 형태 판단
#     nunique_dates = full["_date"].nunique()
#     is_daily_like = (len(full) == nunique_dates)

#     # 4) 열 그룹화
#     count_cols = [c for c in full.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
#     time_cols  = [c for c in full.columns if c.endswith("_Time")]
#     util_cols  = [c for c in full.columns if c.endswith("_Util")]
#     queue_cols = [c for c in full.columns if c.endswith("_Queue")]

#     # 5) 일 집계/스킵
#     if is_daily_like:
#         df_daily = full.drop(columns=["_date"]).copy()
#         df_daily[date_col] = full["_date"].values
#     else:
#         df_daily = full.drop(columns=["_date"]).copy()
#         df_daily, count_cols, time_cols, util_cols, queue_cols, date_col = aggregate_to_daily(df_daily, date_col)

#     # 6) 시간 분해
#     out = make_time_scheduled(
#         df_daily,
#         count_cols=count_cols,
#         time_cols=time_cols,
#         util_cols=util_cols,
#         queue_cols=queue_cols,
#         date_col=date_col,
#         granularity=GRANULARITY,
#     )

#     # 7) 저장
#     out.to_csv(OUTPUT_CSV, index=False, encoding="utf-8-sig")

#     # 8) 로그
#     sample_day = pd.to_datetime(df_daily[date_col].iloc[0])
#     sample_slots = (explode_to_hours(build_schedule_for_day(sample_day))
#                     if GRANULARITY == "hourly"
#                     else build_schedule_for_day(sample_day))
#     slots_per_day = len(sample_slots) if GRANULARITY == "hourly" else len(SCHEDULE_BLOCKS)

#     print(f"[OK] Saved -> {OUTPUT_CSV}")
#     print(f"입력 날짜 수: {nunique_dates:,} | 대표 하루 슬롯 수: {slots_per_day}")
#     print(f"예상 행수 ≈ {nunique_dates * slots_per_day:,} | 실제 행수 = {len(out):,}")

[OK] Saved -> ./FinalResults_time_scheduled_20240101_20250630.csv
입력 날짜 수: 547 | 대표 하루 슬롯 수: 35
예상 행수 ≈ 19,145 | 실제 행수 = 19,145


In [4]:
# # -*- coding: utf-8 -*-
# """
# 하루 → 시간 슬롯 분해 (병목 빈도 반영 버전)
# - 원본(하루 1행)에서 '날짜별 최댓값 큐' 빈도로 큐별 지수 α를 자동 산출
# - 시간 분해 시 L = (λ×W)^α 로 큐별 스파이크를 보정
# - 각 큐의 '일 평균'은 그대로 유지(스케일링)하되, 시간대별 최댓값 경쟁 결과가 일빈도 경향을 따르도록 유도
# - 스케줄 창: 07:50 ~ (다음날) 07:40, 20:10~20:40은 휴식으로 보정
# """

# import numpy as np
# import pandas as pd

# # =========================
# # 1) CONFIG
# # =========================
# INPUT_CSV  = "./subset_20240101_20250630_fullcols.csv"
# OUTPUT_CSV = "./FinalResults_time_scheduled_20240101_20250630.csv"
# GRANULARITY = "hourly"  # "hourly" or "block"

# SCHEDULE_BLOCKS = [
#     ("07:50", "09:50", "주간"),
#     ("09:50", "10:00", "휴식"),
#     ("10:00", "11:50", "주간"),
#     ("11:50", "12:40", "중식"),
#     ("12:40", "14:40", "주간"),
#     ("14:40", "14:50", "휴식"),
#     ("14:50", "16:40", "주간"),
#     ("16:40", "17:10", "석식"),
#     ("17:10", "20:10", "잔업"),
#     ("20:10", "20:40", "휴식"),   # 교대 30분
#     ("20:40", "00:00", "야간"),
#     ("00:00", "00:30", "야식"),   # 다음날
#     ("00:30", "04:00", "야간"),   # 다음날
#     ("04:00", "05:00", "휴식"),   # 다음날
#     ("05:00", "07:40", "야간"),   # 다음날
# ]

# SHIFT_WEIGHTS = {
#     "주간": 1.0,
#     "잔업": 0.9,
#     "야간": 1.2,
#     "중식": 0.0,
#     "석식": 0.0,
#     "야식": 0.0,
#     "휴식": 0.0,
# }

# UTIL_BAND = 0.15
# TIME_BAND = 0.10

# # α 범위(자동 배정용)
# ALPHA_MIN = 0.60   # 거의 병목이 아닌 큐 (스파이크 완화)
# ALPHA_MAX = 1.10   # 자주 병목이던 큐 (스파이크 보존)

# # =========================
# # 2) HELPERS
# # =========================
# def to_dt(s):
#     try:
#         return pd.to_datetime(s, errors="coerce")
#     except Exception:
#         return pd.to_datetime(s.astype(str), errors="coerce")

# def detect_date_col(df: pd.DataFrame) -> str:
#     for c in ["Time_Now", "Timestamp", "timestamp", "Date", "date"]:
#         if c in df.columns:
#             return c
#     return df.columns[0]

# def day_floor(dt: pd.Series) -> pd.Series:
#     return dt.dt.floor("D")

# def aggregate_to_daily(df: pd.DataFrame, date_col: str):
#     """일 단위 집계: 생산량 sum, 시간/유틸/큐 mean, 기타 first"""
#     dt = to_dt(df[date_col])
#     if dt.isna().all():
#         raise ValueError(f"Could not parse datetime from '{date_col}'")
#     df = df.copy()
#     df["_date"] = day_floor(dt)

#     count_cols = [c for c in df.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
#     time_cols  = [c for c in df.columns if c.endswith("_Time")]
#     util_cols  = [c for c in df.columns if c.endswith("_Util")]
#     queue_cols = [c for c in df.columns if c.endswith("_Queue")]
#     other_cols = sorted(set(df.columns) - set([date_col, "_date"] + count_cols + time_cols + util_cols + queue_cols))

#     agg = {}
#     for c in count_cols: agg[c] = "sum"
#     for c in time_cols + util_cols + queue_cols: agg[c] = "mean"
#     for c in other_cols: agg[c] = "first"

#     daily = df.groupby("_date", as_index=False).agg(agg).sort_values("_date").reset_index(drop=True)
#     daily[date_col] = daily["_date"]
#     return daily.drop(columns=["_date"]), count_cols, time_cols, util_cols, queue_cols, date_col

# def build_schedule_for_day(day: pd.Timestamp) -> pd.DataFrame:
#     """스케줄을 07:50~(다음날)07:40 윈도로 정렬/클램프"""
#     rows = []
#     anchor_start = pd.Timestamp(day.year, day.month, day.day, 7, 50)
#     anchor_end   = pd.Timestamp(day.year, day.month, day.day, 7, 40) + pd.Timedelta(days=1)

#     for start_str, end_str, label in SCHEDULE_BLOCKS:
#         s_h, s_m = map(int, start_str.split(":"))
#         e_h, e_m = map(int, end_str.split(":"))

#         start = pd.Timestamp(day.year, day.month, day.day, s_h, s_m)
#         if (s_h, s_m) < (7, 50): start += pd.Timedelta(days=1)

#         end = pd.Timestamp(day.year, day.month, day.day, e_h, e_m)
#         if (e_h, e_m) < (7, 50) or end <= start: end += pd.Timedelta(days=1)

#         s = max(start, anchor_start)
#         e = min(end, anchor_end)
#         if e <= s: continue

#         minutes = int((e - s).total_seconds() // 60)
#         rows.append({"start": s, "end": e, "label": label, "minutes": minutes})

#     return pd.DataFrame(rows)

# def explode_to_hours(sch: pd.DataFrame) -> pd.DataFrame:
#     """블록→정시 슬롯 폭발 + 겹치는 분 계산"""
#     t_min = sch["start"].min().floor("h")
#     t_max = sch["end"].max().ceil("h")
#     hours = pd.date_range(t_min, t_max, freq="h")
#     slots = pd.DataFrame({"slot_start": hours[:-1], "slot_end": hours[1:]})

#     out = []
#     for _, blk in sch.iterrows():
#         for _, sl in slots.iterrows():
#             s = max(blk["start"], sl["slot_start"])
#             e = min(blk["end"], sl["slot_end"])
#             overlap = (e - s).total_seconds() / 60.0
#             if overlap > 0:
#                 out.append({
#                     "slot_start": sl["slot_start"],
#                     "slot_end": sl["slot_end"],
#                     "label": blk["label"],
#                     "min_overlap": overlap
#                 })
#     return pd.DataFrame(out)

# def integer_allocate(total: float, weights: np.ndarray) -> np.ndarray:
#     """정수 분배(잔차 보정)"""
#     w = np.clip(np.asarray(weights, float), 0, None)
#     if w.sum() <= 0 or total <= 0:
#         return np.zeros_like(w, dtype=int)
#     p = w / w.sum()
#     alloc = np.floor(total * p).astype(int)
#     residual = int(round(total - alloc.sum()))
#     if residual > 0:
#         idx = np.argsort(-p)[:residual]
#         alloc[idx] += 1
#     return alloc

# def compute_alpha_by_queue(df_daily: pd.DataFrame, queue_cols: list) -> dict:
#     """
#     날짜별로 '최댓값 큐'를 집계해 빈도 순위를 만들고,
#     순위 기반으로 각 큐의 α를 [ALPHA_MIN, ALPHA_MAX] 사이로 선형 매핑.
#     - 상위(자주 병목)일수록 α ↑ (스파이크 보존)
#     - 하위(거의 병목 아님)일수록 α ↓ (스파이크 완화)
#     """
#     # 날짜별 최댓값 큐
#     tops = []
#     for d, g in df_daily.groupby(df_daily.columns[-1]):  # 마지막에 date_col이 들어가 있음
#         # g: 하루 1행 구조일 가능성 높음 (mean 적용돼있어도 OK)
#         row = g.iloc[0]
#         vals = {qc: float(row.get(qc, 0.0)) for qc in queue_cols}
#         if not vals: continue
#         top_q = max(vals, key=vals.get)
#         tops.append(top_q)
#     freq = pd.Series(tops).value_counts()
#     # 빈도가 없는 큐는 0으로 채움
#     freq = freq.reindex(queue_cols, fill_value=0)

#     # 순위(내림차순: 1이 가장 높음)
#     rank = freq.rank(ascending=False, method="dense").astype(int)
#     rmin, rmax = rank.min(), rank.max()
#     # 방어: 큐가 1개뿐인 경우 등
#     if rmin == rmax:
#         return {qc: 1.0 for qc in queue_cols}  # 모두 동일 α=1

#     # 선형 매핑: 높은 빈도(낮은 rank 숫자) → ALPHA_MAX, 낮은 빈도(높은 rank) → ALPHA_MIN
#     alpha = {}
#     for qc in queue_cols:
#         r = rank.loc[qc]
#         t = (r - rmin) / (rmax - rmin)  # 0~1
#         a = ALPHA_MAX - t*(ALPHA_MAX - ALPHA_MIN)
#         alpha[qc] = float(a)
#     return alpha

# # =========================
# # 3) MAIN TRANSFORM
# # =========================
# def make_time_scheduled(df_daily: pd.DataFrame,
#                         count_cols, time_cols, util_cols, queue_cols,
#                         date_col: str,
#                         granularity: str = "hourly") -> pd.DataFrame:
#     rows = []

#     # SKU별 Wait 컬럼 매핑
#     sku_wait_cols = {c.split("_")[0]: c for c in time_cols if c.startswith("SKU") and c.endswith("Wait_Time")}
#     overall_wait_name = next((c for c in time_cols if c.endswith("Wait_Time")), None)

#     # === 병목 빈도 기반 α 산출 ===
#     alpha_by_q = compute_alpha_by_queue(df_daily[[date_col] + queue_cols].rename(columns={date_col: "Date"}).assign(Date=lambda d:d["Date"]).rename(columns={"Date":date_col}),
#                                         queue_cols)

#     for _, day_row in df_daily.iterrows():
#         day = pd.to_datetime(day_row[date_col]).floor("D")
#         sch = build_schedule_for_day(day)
#         if sch.empty:
#             continue

#         if granularity == "block":
#             slot_df = sch.rename(columns={"start":"slot_start","end":"slot_end"})
#             slot_df["min_overlap"] = (slot_df["slot_end"] - slot_df["slot_start"]).dt.total_seconds()/60.0
#             slot_df = slot_df[["slot_start","slot_end","label","min_overlap"]]
#         else:
#             slot_df = explode_to_hours(sch)

#         # 생산 가중치
#         slot_df["shift_weight"] = slot_df["label"].map(SHIFT_WEIGHTS).fillna(0.0)
#         slot_df["work_minutes"] = slot_df["min_overlap"] * (slot_df["shift_weight"] > 0).astype(float)
#         slot_df["prod_weight"] = slot_df["work_minutes"] * slot_df["shift_weight"]

#         # count 분배(정수)
#         for c in count_cols:
#             total = float(day_row[c])
#             slot_df[c] = integer_allocate(total, slot_df["prod_weight"].values)

#         # Util/Time 분배
#         safe_eps = 1e-12
#         work_mask = slot_df["shift_weight"].values > 0
#         sw = slot_df["shift_weight"].values.copy()
#         if work_mask.any():
#             sw_norm = np.zeros_like(sw, dtype=float)
#             mean_sw = sw[work_mask].mean()
#             sw_norm[work_mask] = sw[work_mask] / (mean_sw + safe_eps)
#         else:
#             sw_norm = np.zeros_like(sw, dtype=float)

#         for c in util_cols:
#             mean_u = float(day_row[c])
#             u = np.zeros(len(slot_df), dtype=float)
#             u[work_mask] = mean_u * (1.0 - UTIL_BAND + 2*UTIL_BAND * sw_norm[work_mask])
#             slot_df[c] = np.clip(u, 0, 1)

#         for c in time_cols:
#             mean_t = float(day_row[c])
#             t = np.zeros(len(slot_df), dtype=float)
#             if mean_t != 0 and work_mask.any():
#                 t[work_mask] = mean_t * (1.0 - TIME_BAND + 2*TIME_BAND * sw_norm[work_mask])
#             slot_df[c] = t

#         # Queue: L = (λ × W) ** α_q  → 큐별 '모양' 보정
#         slot_df["slot_seconds"] = (slot_df["slot_end"] - slot_df["slot_start"]).dt.total_seconds()
#         per_slot_total = slot_df[count_cols].sum(axis=1).astype(float)
#         lam = (per_slot_total / slot_df["slot_seconds"].replace(0, np.nan)).fillna(0.0)

#         # 미리 λ×W(기본형) 저장해두고 큐별 α 적용
#         base_L = {}
#         for qc in queue_cols:
#             # Wait 추정
#             sku_tag = next((p for p in qc.split("_") if p.startswith("SKU")), None)
#             if sku_tag and sku_tag in sku_wait_cols:
#                 W = slot_df[sku_wait_cols[sku_tag]].astype(float)
#             elif overall_wait_name is not None:
#                 W = slot_df[overall_wait_name].astype(float)
#             else:
#                 W = pd.Series(np.zeros(len(slot_df)))
#             L0 = lam * W
#             a = float(alpha_by_q.get(qc, 1.0))
#             base_L[qc] = np.power(np.maximum(L0.values, 0.0), a)  # (λW)^α

#         # 큐별 일 평균(원본) 규모는 유지하되, 시간대 분포(shape)는 (λW)^α로
#         for qc in queue_cols:
#             daily_mean_q = float(day_row[qc])
#             v = base_L[qc]
#             if np.any(v > 0) and daily_mean_q != 0:
#                 scale = daily_mean_q / (np.mean(v) + 1e-9)
#                 q_vals = v * scale
#             else:
#                 # 생산 가중치 비례 분배(평균 유지)
#                 w = slot_df["prod_weight"].values
#                 w = w / (w.sum() + 1e-9)
#                 q_vals = w * daily_mean_q * len(slot_df)
#             slot_df[qc] = q_vals

#         # 출력
#         for _, r in slot_df.iterrows():
#             out = {"Time_Now": r["slot_start"], "Time_End": r["slot_end"], "ShiftLabel": r["label"]}
#             for c in count_cols + util_cols + time_cols + queue_cols:
#                 out[c] = r.get(c, 0)
#             rows.append(out)

#     result = pd.DataFrame(rows).sort_values(["Time_Now"]).reset_index(drop=True)

#     # 컬럼 순서
#     keep_order, all_cols = [], set(count_cols + util_cols + time_cols + queue_cols)
#     for c in df_daily.columns:
#         if c in all_cols:
#             keep_order.append(c)
#     for c in (set(all_cols) - set(keep_order)):
#         keep_order.append(c)
#     result = result.reindex(columns=["Time_Now","Time_End","ShiftLabel"] + keep_order)
#     return result

# # =========================
# # 4) RUN
# # =========================
# if __name__ == "__main__":
#     full = pd.read_csv(INPUT_CSV, low_memory=False)

#     date_col = detect_date_col(full)
#     full[date_col] = to_dt(full[date_col])
#     if full[date_col].isna().all():
#         raise ValueError(f"날짜 파싱 실패: '{date_col}'")
#     full["_date"] = full[date_col].dt.floor("D")

#     nunique_dates = full["_date"].nunique()
#     is_daily_like = (len(full) == nunique_dates)

#     count_cols = [c for c in full.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
#     time_cols  = [c for c in full.columns if c.endswith("_Time")]
#     util_cols  = [c for c in full.columns if c.endswith("_Util")]
#     queue_cols = [c for c in full.columns if c.endswith("_Queue")]

#     if is_daily_like:
#         df_daily = full.drop(columns=["_date"]).copy()
#         df_daily[date_col] = full["_date"].values
#     else:
#         df_daily = full.drop(columns=["_date"]).copy()
#         df_daily, count_cols, time_cols, util_cols, queue_cols, date_col = aggregate_to_daily(df_daily, date_col)

#     out = make_time_scheduled(
#         df_daily,
#         count_cols=count_cols,
#         time_cols=time_cols,
#         util_cols=util_cols,
#         queue_cols=queue_cols,
#         date_col=date_col,
#         granularity=GRANULARITY,
#     )

#     out.to_csv(OUTPUT_CSV, index=False, encoding="utf-8-sig")

#     # 진단
#     sample_day = pd.to_datetime(df_daily[date_col].iloc[0])
#     sample_slots = (explode_to_hours(build_schedule_for_day(sample_day))
#                     if GRANULARITY == "hourly"
#                     else build_schedule_for_day(sample_day))
#     slots_per_day = len(sample_slots) if GRANULARITY == "hourly" else len(SCHEDULE_BLOCKS)

#     print(f"[OK] Saved -> {OUTPUT_CSV}")
#     print(f"입력 날짜 수: {nunique_dates:,} | 대표 하루 슬롯 수: {slots_per_day}")
#     print(f"예상 행수 ≈ {nunique_dates * slots_per_day:,} | 실제 행수 = {len(out):,}")

#     # 추가: 2025-03-15 시간대별 최댓값 큐 빈도 간단 체크
#     try:
#         tgt = pd.Timestamp("2025-03-15")
#         one = out[out["Time_Now"].dt.floor("D") == tgt].copy()
#         qcols = [c for c in queue_cols if c in one.columns]
#         topcol = one[qcols].idxmax(axis=1)
#         print("\n[디버그] 2025-03-15 시간대 최댓값 큐 상위 10개:")
#         print(topcol.value_counts().head(10))
#     except Exception:
#         pass

[OK] Saved -> ./FinalResults_time_scheduled_20240101_20250630.csv
입력 날짜 수: 547 | 대표 하루 슬롯 수: 35
예상 행수 ≈ 19,145 | 실제 행수 = 19,145

[디버그] 2025-03-15 시간대 최댓값 큐 상위 10개:
Warehouse1_Queue       26
Blanking_SKU1_Queue     9
Name: count, dtype: int64


In [5]:
# try:
#     tgt = pd.Timestamp("2025-06-30")
#     one = out[out["Time_Now"].dt.floor("D") == tgt].copy()
#     qcols = [c for c in queue_cols if c in one.columns]
#     topcol = one[qcols].idxmax(axis=1)
#     print("\n[디버그] 2025-06-30 시간대 최댓값 큐 상위 10개:")
#     print(topcol.value_counts().head(10))
# except Exception:
#     pass


[디버그] 2025-06-30 시간대 최댓값 큐 상위 10개:
Warehouse1_Queue           24
Blanking_SKU1_Queue         9
Forklift_Press_Queue        1
Forklift_Blanking_Queue     1
Name: count, dtype: int64


In [6]:
# # -*- coding: utf-8 -*-
# """
# 하루 → 시간 슬롯 분해 (일 병목 빈도 반영·우선순위 가중 버전)
# - (λ×W)^α 로 큐별 '모양' 보정 + β 가중으로 '최댓값 경쟁력' 보정
# - 보정 후 큐별 일평균은 원본과 동일하게 재스케일 → 규모 보존, 분포만 수정
# - 스케줄 창: 07:50 ~ (다음날) 07:40, 20:10~20:40 휴식
# """

# import numpy as np
# import pandas as pd

# # =========================
# # 1) CONFIG
# # =========================
# INPUT_CSV  = "./subset_20240101_20250630_fullcols.csv"
# OUTPUT_CSV = "./FinalResults_time_scheduled_20240101_20250630.csv"
# GRANULARITY = "hourly"  # "hourly" or "block"

# # 사용자가 알려준 "생산량 기준 병목 빈도" 우선순위(상위→하위)
# # 비우면(None) 자동 추정(일별 최댓값 큐 빈도) 사용
# PREFERRED_PRIORITY = [
#     "Warehouse1_Queue",
#     "Forklift_Blanking_Queue",
#     "Warehouse_3_Queue",
#     "Warehouse_4_Queue",
#     "Forklift_Press_Queue",
#     "Press4_Queue",
#     "Press2_Queue",
#     "Blanking_SKU3_Queue",
# ]

# # 스케줄
# SCHEDULE_BLOCKS = [
#     ("07:50", "09:50", "주간"),
#     ("09:50", "10:00", "휴식"),
#     ("10:00", "11:50", "주간"),
#     ("11:50", "12:40", "중식"),
#     ("12:40", "14:40", "주간"),
#     ("14:40", "14:50", "휴식"),
#     ("14:50", "16:40", "주간"),
#     ("16:40", "17:10", "석식"),
#     ("17:10", "20:10", "잔업"),
#     ("20:10", "20:40", "휴식"),
#     ("20:40", "00:00", "야간"),
#     ("00:00", "00:30", "야식"),
#     ("00:30", "04:00", "야간"),
#     ("04:00", "05:00", "휴식"),
#     ("05:00", "07:40", "야간"),
# ]

# SHIFT_WEIGHTS = {"주간":1.0, "잔업":0.9, "야간":1.2, "중식":0.0, "석식":0.0, "야식":0.0, "휴식":0.0}

# UTIL_BAND = 0.15
# TIME_BAND = 0.10

# # α(모양 보정) 범위
# ALPHA_MIN = 0.70
# ALPHA_MAX = 1.15

# # β(최댓값 경쟁력) 범위
# BETA_MIN = 0.75
# BETA_MAX = 1.35

# # =========================
# # 2) HELPERS
# # =========================
# def to_dt(s):
#     try: return pd.to_datetime(s, errors="coerce")
#     except: return pd.to_datetime(s.astype(str), errors="coerce")

# def detect_date_col(df: pd.DataFrame) -> str:
#     for c in ["Time_Now", "Timestamp", "timestamp", "Date", "date"]:
#         if c in df.columns: return c
#     return df.columns[0]

# def day_floor(dt: pd.Series) -> pd.Series:
#     return dt.dt.floor("D")

# def aggregate_to_daily(df: pd.DataFrame, date_col: str):
#     """일 단위 집계: count sum / time/util/queue mean / 기타 first"""
#     dt = to_dt(df[date_col]); 
#     if dt.isna().all(): raise ValueError(f"Could not parse datetime from '{date_col}'")
#     df = df.copy(); df["_date"] = day_floor(dt)

#     count_cols = [c for c in df.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
#     time_cols  = [c for c in df.columns if c.endswith("_Time")]
#     util_cols  = [c for c in df.columns if c.endswith("_Util")]
#     queue_cols = [c for c in df.columns if c.endswith("_Queue")]
#     other_cols = sorted(set(df.columns) - set([date_col, "_date"] + count_cols + time_cols + util_cols + queue_cols))

#     agg = {}
#     for c in count_cols: agg[c] = "sum"
#     for c in time_cols + util_cols + queue_cols: agg[c] = "mean"
#     for c in other_cols: agg[c] = "first"

#     daily = df.groupby("_date", as_index=False).agg(agg).sort_values("_date").reset_index(drop=True)
#     daily[date_col] = daily["_date"]
#     return daily.drop(columns=["_date"]), count_cols, time_cols, util_cols, queue_cols, date_col

# def build_schedule_for_day(day: pd.Timestamp) -> pd.DataFrame:
#     """07:50~(다음날)07:40 창으로 스케줄 정렬/클램프"""
#     rows = []
#     anchor_start = pd.Timestamp(day.year, day.month, day.day, 7, 50)
#     anchor_end   = pd.Timestamp(day.year, day.month, day.day, 7, 40) + pd.Timedelta(days=1)
#     for start_str, end_str, label in SCHEDULE_BLOCKS:
#         s_h, s_m = map(int, start_str.split(":"))
#         e_h, e_m = map(int, end_str.split(":"))
#         start = pd.Timestamp(day.year, day.month, day.day, s_h, s_m)
#         end   = pd.Timestamp(day.year, day.month, day.day, e_h, e_m)
#         if (s_h, s_m) < (7, 50): start += pd.Timedelta(days=1)
#         if (e_h, e_m) < (7, 50) or end <= start: end += pd.Timedelta(days=1)
#         s = max(start, anchor_start); e = min(end, anchor_end)
#         if e <= s: continue
#         minutes = int((e - s).total_seconds() // 60)
#         rows.append({"start": s, "end": e, "label": label, "minutes": minutes})
#     return pd.DataFrame(rows)

# def explode_to_hours(sch: pd.DataFrame) -> pd.DataFrame:
#     """블록 → 정시 슬롯 분해 + 겹치는 분(min_overlap)"""
#     t_min = sch["start"].min().floor("h"); t_max = sch["end"].max().ceil("h")
#     hours = pd.date_range(t_min, t_max, freq="h")
#     slots = pd.DataFrame({"slot_start": hours[:-1], "slot_end": hours[1:]})
#     out=[]
#     for _, blk in sch.iterrows():
#         for _, sl in slots.iterrows():
#             s = max(blk["start"], sl["slot_start"]); e = min(blk["end"], sl["slot_end"])
#             ov = (e - s).total_seconds() / 60.0
#             if ov > 0:
#                 out.append({"slot_start": sl["slot_start"], "slot_end": sl["slot_end"], "label": blk["label"], "min_overlap": ov})
#     return pd.DataFrame(out)

# def integer_allocate(total: float, weights: np.ndarray) -> np.ndarray:
#     """정수 분배(잔차 보정)"""
#     w = np.clip(np.asarray(weights, float), 0, None)
#     if w.sum() <= 0 or total <= 0: return np.zeros_like(w, dtype=int)
#     p = w / w.sum()
#     alloc = np.floor(total * p).astype(int)
#     residual = int(round(total - alloc.sum()))
#     if residual > 0:
#         idx = np.argsort(-p)[:residual]; alloc[idx] += 1
#     return alloc

# def rank_from_daily_top(df_daily: pd.DataFrame, qcols: list, date_col: str) -> pd.Series:
#     """날짜별 최댓값 큐 빈도 → 순위(1이 최상)"""
#     tops=[]
#     for _, g in df_daily.groupby(df_daily[date_col].dt.floor("D")):
#         row=g.iloc[0]
#         vals={qc: float(row.get(qc,0.0)) for qc in qcols}
#         if vals: tops.append(max(vals, key=vals.get))
#     freq = pd.Series(tops).value_counts().reindex(qcols, fill_value=0)
#     return freq.rank(ascending=False, method="dense").astype(int)

# def make_alpha_beta(qcols: list, df_daily: pd.DataFrame, date_col: str) -> tuple[dict, dict]:
#     """
#     α: 일 최댓값 빈도 순위(또는 사용자 우선순위) → [ALPHA_MIN, ALPHA_MAX]
#     β: 사용자 우선순위(있으면) 우선 적용 → [BETA_MIN, BETA_MAX]
#     """
#     # 1) 기본 순위(자동): 일 최댓값 빈도
#     auto_rank = rank_from_daily_top(df_daily, qcols, date_col)  # 1이 최상
#     rmin, rmax = auto_rank.min(), auto_rank.max()

#     def lin_map(val, vmin, vmax, omin, omax):
#         if vmax == vmin: return (omin+omax)/2
#         t = (val - vmin) / (vmax - vmin)
#         return omin + (1 - t) * (omax - omin)  # 낮은 rank(=더 상위) → 큰 값

#     alpha = {}
#     for qc in qcols:
#         r = auto_rank.loc[qc]
#         alpha[qc] = float(lin_map(r, rmin, rmax, ALPHA_MIN, ALPHA_MAX))

#     # 2) β: 사용자 우선순위가 있으면 그걸 강하게 반영
#     if PREFERRED_PRIORITY:
#         # 리스트 앞쪽일수록 높은 β
#         idx_map = {qc:i for i, qc in enumerate(PREFERRED_PRIORITY)}
#         kmin, kmax = 0, max(0, len(PREFERRED_PRIORITY)-1)
#         beta = {qc: float(lin_map(idx_map.get(qc, kmax), kmin, kmax, BETA_MIN, BETA_MAX)) for qc in qcols}
#     else:
#         # 없으면 α와 같은 순위를 약하게 반영
#         beta = {qc: float(lin_map(auto_rank.loc[qc], rmin, rmax, BETA_MIN, BETA_MAX)) for qc in qcols}

#     return alpha, beta

# # =========================
# # 3) MAIN TRANSFORM
# # =========================
# def make_time_scheduled(df_daily: pd.DataFrame,
#                         count_cols, time_cols, util_cols, queue_cols,
#                         date_col: str,
#                         granularity: str = "hourly") -> pd.DataFrame:
#     rows = []

#     # SKU별 Wait 매핑
#     sku_wait_cols = {c.split("_")[0]: c for c in time_cols if c.startswith("SKU") and c.endswith("Wait_Time")}
#     overall_wait_name = next((c for c in time_cols if c.endswith("Wait_Time")), None)

#     # === α, β 산출 ===
#     alpha_by_q, beta_by_q = make_alpha_beta(queue_cols, df_daily, date_col)

#     for _, day_row in df_daily.iterrows():
#         day = pd.to_datetime(day_row[date_col]).floor("D")
#         sch = build_schedule_for_day(day)
#         if sch.empty: continue

#         if granularity == "block":
#             slot_df = sch.rename(columns={"start":"slot_start","end":"slot_end"})
#             slot_df["min_overlap"] = (slot_df["slot_end"] - slot_df["slot_start"]).dt.total_seconds()/60.0
#             slot_df = slot_df[["slot_start","slot_end","label","min_overlap"]]
#         else:
#             slot_df = explode_to_hours(sch)

#         # 생산 가중치
#         slot_df["shift_weight"] = slot_df["label"].map(SHIFT_WEIGHTS).fillna(0.0)
#         slot_df["work_minutes"] = slot_df["min_overlap"] * (slot_df["shift_weight"] > 0).astype(float)
#         slot_df["prod_weight"] = slot_df["work_minutes"] * slot_df["shift_weight"]

#         # count 분배(정수)
#         for c in count_cols:
#             total = float(day_row[c])
#             slot_df[c] = integer_allocate(total, slot_df["prod_weight"].values)

#         # Util/Time 분배
#         safe_eps = 1e-12
#         work_mask = slot_df["shift_weight"].values > 0
#         sw = slot_df["shift_weight"].values.copy()
#         if work_mask.any():
#             sw_norm = np.zeros_like(sw, dtype=float)
#             mean_sw = sw[work_mask].mean()
#             sw_norm[work_mask] = sw[work_mask] / (mean_sw + safe_eps)
#         else:
#             sw_norm = np.zeros_like(sw, dtype=float)

#         for c in util_cols:
#             mean_u = float(day_row[c])
#             u = np.zeros(len(slot_df), dtype=float)
#             u[work_mask] = mean_u * (1.0 - UTIL_BAND + 2*UTIL_BAND * sw_norm[work_mask])
#             slot_df[c] = np.clip(u, 0, 1)

#         for c in time_cols:
#             mean_t = float(day_row[c])
#             t = np.zeros(len(slot_df), dtype=float)
#             if mean_t != 0 and work_mask.any():
#                 t[work_mask] = mean_t * (1.0 - TIME_BAND + 2*TIME_BAND * sw_norm[work_mask])
#             slot_df[c] = t

#         # ---- Queue 생성 ----
#         slot_df["slot_seconds"] = (slot_df["slot_end"] - slot_df["slot_start"]).dt.total_seconds()
#         per_slot_total = slot_df[count_cols].sum(axis=1).astype(float)
#         lam = (per_slot_total / slot_df["slot_seconds"].replace(0, np.nan)).fillna(0.0)

#         # 1) (λ×W)^α 계산
#         shaped = {}
#         for qc in queue_cols:
#             sku_tag = next((p for p in qc.split("_") if p.startswith("SKU")), None)
#             if sku_tag and sku_tag in sku_wait_cols:
#                 W = slot_df[sku_wait_cols[sku_tag]].astype(float)
#             elif overall_wait_name is not None:
#                 W = slot_df[overall_wait_name].astype(float)
#             else:
#                 W = pd.Series(np.zeros(len(slot_df)))
#             L0 = lam * W
#             a = float(alpha_by_q.get(qc, 1.0))
#             shaped[qc] = np.power(np.maximum(L0.values, 0.0), a)

#         # 2) β 가중(최댓값 경쟁력) → 3) 큐별 일평균 보존 재스케일
#         for qc in queue_cols:
#             daily_mean_q = float(day_row[qc])
#             v = shaped[qc] * float(beta_by_q.get(qc, 1.0))  # β 적용
#             if np.any(v > 0) and daily_mean_q != 0:
#                 scale = daily_mean_q / (np.mean(v) + 1e-9)
#                 q_vals = v * scale
#             else:
#                 # 생산 가중치 기반 균등 분배(평균 유지)
#                 w = slot_df["prod_weight"].values
#                 w = w / (w.sum() + 1e-9)
#                 q_vals = w * daily_mean_q * len(slot_df)
#             slot_df[qc] = q_vals

#         # 출력 누적
#         for _, r in slot_df.iterrows():
#             out = {"Time_Now": r["slot_start"], "Time_End": r["slot_end"], "ShiftLabel": r["label"]}
#             for c in count_cols + util_cols + time_cols + queue_cols:
#                 out[c] = r.get(c, 0)
#             rows.append(out)

#     result = pd.DataFrame(rows).sort_values(["Time_Now","Time_End","ShiftLabel"]).reset_index(drop=True)

#     # 컬럼 순서 정리
#     keep_order, all_cols = [], set(count_cols + util_cols + time_cols + queue_cols)
#     for c in df_daily.columns:
#         if c in all_cols: keep_order.append(c)
#     for c in (set(all_cols) - set(keep_order)): keep_order.append(c)
#     result = result.reindex(columns=["Time_Now","Time_End","ShiftLabel"] + keep_order)
#     return result

# # =========================
# # 4) RUN
# # =========================
# if __name__ == "__main__":
#     full = pd.read_csv(INPUT_CSV, low_memory=False)

#     date_col = detect_date_col(full)
#     full[date_col] = to_dt(full[date_col])
#     if full[date_col].isna().all():
#         raise ValueError(f"날짜 파싱 실패: '{date_col}'")
#     full["_date"] = full[date_col].dt.floor("D")

#     nunique_dates = full["_date"].nunique()
#     is_daily_like = (len(full) == nunique_dates)

#     count_cols = [c for c in full.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
#     time_cols  = [c for c in full.columns if c.endswith("_Time")]
#     util_cols  = [c for c in full.columns if c.endswith("_Util")]
#     queue_cols = [c for c in full.columns if c.endswith("_Queue")]

#     if is_daily_like:
#         df_daily = full.drop(columns=["_date"]).copy()
#         df_daily[date_col] = full["_date"].values
#     else:
#         df_daily = full.drop(columns=["_date"]).copy()
#         df_daily, count_cols, time_cols, util_cols, queue_cols, date_col = aggregate_to_daily(df_daily, date_col)

#     out = make_time_scheduled(
#         df_daily,
#         count_cols=count_cols,
#         time_cols=time_cols,
#         util_cols=util_cols,
#         queue_cols=queue_cols,
#         date_col=date_col,
#         granularity=GRANULARITY,
#     )

#     out.to_csv(OUTPUT_CSV, index=False, encoding="utf-8-sig")

#     # 진단: 특정일 시간대 최댓값 큐 카운트
#     for tgt in ["2025-03-15", "2025-06-30"]:
#         try:
#             d = pd.Timestamp(tgt)
#             one = out[out["Time_Now"].dt.floor("D") == d].copy()
#             qcols = [c for c in queue_cols if c in one.columns]
#             topcol = one[qcols].idxmax(axis=1)
#             print(f"\n[디버그] {tgt} 시간대 최댓값 큐 상위 10개:")
#             print(topcol.value_counts().head(10))
#         except Exception:
#             pass

#     # 요약
#     sample_day = pd.to_datetime(df_daily[date_col].iloc[0])
#     sample_slots = (explode_to_hours(build_schedule_for_day(sample_day))
#                     if GRANULARITY == "hourly"
#                     else build_schedule_for_day(sample_day))
#     slots_per_day = len(sample_slots) if GRANULARITY == "hourly" else len(SCHEDULE_BLOCKS)
#     print(f"\n[OK] Saved -> {OUTPUT_CSV}")
#     print(f"입력 날짜 수: {nunique_dates:,} | 대표 하루 슬롯 수: {slots_per_day}")
#     print(f"예상 행수 ≈ {nunique_dates * slots_per_day:,} | 실제 행수 = {len(out):,}")


[디버그] 2025-03-15 시간대 최댓값 큐 상위 10개:
Warehouse1_Queue       26
Blanking_SKU1_Queue     9
Name: count, dtype: int64

[디버그] 2025-06-30 시간대 최댓값 큐 상위 10개:
Warehouse1_Queue           24
Blanking_SKU1_Queue         9
Forklift_Blanking_Queue     2
Name: count, dtype: int64

[OK] Saved -> ./FinalResults_time_scheduled_20240101_20250630.csv
입력 날짜 수: 547 | 대표 하루 슬롯 수: 35
예상 행수 ≈ 19,145 | 실제 행수 = 19,145


In [1]:
# -*- coding: utf-8 -*-
"""
하루 → 시간 슬롯 분해 (일 병목 빈도 '강제 반영' 버전)
- (λ×W)^α + 우선순위 기반 β(강화) + 슬롯별 근접 우승자 승급
- 보정 후 큐별 '일평균'은 원본과 동일하게 재스케일 → 규모 보존
- 스케줄 창: 07:50 ~ (다음날) 07:40, 20:10~20:40 휴식
"""

import numpy as np
import pandas as pd

# =========================
# 1) CONFIG
# =========================
INPUT_CSV  = "./subset_20240101_20250630_fullcols.csv"
OUTPUT_CSV = "./FinalResults_time_scheduled_20240101_20250630.csv"
GRANULARITY = "hourly"  # "hourly" or "block"

# ★ 사용자가 알려준 "생산량 기준 병목 빈도" 우선순위(상위→하위)
PREFERRED_PRIORITY = [
    "Warehouse1_Queue",
    "Forklift_Blanking_Queue",
    "Warehouse_3_Queue",
    "Warehouse_4_Queue",
    "Forklift_Press_Queue",
    "Press4_Queue",
    "Press2_Queue",
    "Blanking_SKU3_Queue",
]

# 우선 승급을 더 자주 적용할 '계열'(소문자 포함 키워드 ANY 매칭)
PREFERRED_FAMILIES = ["warehouse", "forklift"]

# (선택) 특정 계열 쏠림 완화 패널티 (소문자 startswith/contains로 적용)
PENALIZE_PATTERNS = {
    "blanking_sku": 0.90,  # Blanking_SKU* 전체에 0.9 배
}

# 스케줄
SCHEDULE_BLOCKS = [
    ("07:50", "09:50", "주간"),
    ("09:50", "10:00", "휴식"),
    ("10:00", "11:50", "주간"),
    ("11:50", "12:40", "중식"),
    ("12:40", "14:40", "주간"),
    ("14:40", "14:50", "휴식"),
    ("14:50", "16:40", "주간"),
    ("16:40", "17:10", "석식"),
    ("17:10", "20:10", "잔업"),
    ("20:10", "20:40", "휴식"),
    ("20:40", "00:00", "야간"),
    ("00:00", "00:30", "야식"),
    ("00:30", "04:00", "야간"),
    ("04:00", "05:00", "휴식"),
    ("05:00", "07:40", "야간"),
]

SHIFT_WEIGHTS = {"주간":1.0, "잔업":0.9, "야간":1.2, "중식":0.0, "석식":0.0, "야식":0.0, "휴식":0.0}

UTIL_BAND = 0.15
TIME_BAND = 0.10

# α(모양 보정) 범위 — 기존 그대로
ALPHA_MIN = 0.70
ALPHA_MAX = 1.15

# ★ β(최댓값 경쟁력) 범위 — "강화"
BETA_MIN = 0.50
BETA_MAX = 3.00
BETA_BASE = 1.75  # 우선순위 인덱스에 대한 지수 가중의 밑

# ★ 근접 우승자 승급(슬롯별)
PROX_NEAR_FRAC = 0.60   # 현재 슬롯 max의 60% 이상이면 승급 후보
PROX_UPLIFT    = 1.18   # 후보에 한 번 더 곱해줌(특히 창고/지게차 계열 우선)
PROX_UPLIFT_OTHERS = 1.08  # 그 외 계열(너무 과하면 1.0~1.05로 낮추세요)

# =========================
# 2) HELPERS
# =========================
def to_dt(s):
    try: return pd.to_datetime(s, errors="coerce")
    except: return pd.to_datetime(s.astype(str), errors="coerce")

def detect_date_col(df: pd.DataFrame) -> str:
    for c in ["Time_Now", "Timestamp", "timestamp", "Date", "date"]:
        if c in df.columns: return c
    return df.columns[0]

def day_floor(dt: pd.Series) -> pd.Series:
    return dt.dt.floor("D")

def aggregate_to_daily(df: pd.DataFrame, date_col: str):
    """일 단위 집계: count sum / time/util/queue mean / 기타 first"""
    dt = to_dt(df[date_col]); 
    if dt.isna().all(): raise ValueError(f"Could not parse datetime from '{date_col}'")
    df = df.copy(); df["_date"] = day_floor(dt)

    count_cols = [c for c in df.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
    time_cols  = [c for c in df.columns if c.endswith("_Time")]
    util_cols  = [c for c in df.columns if c.endswith("_Util")]
    queue_cols = [c for c in df.columns if c.endswith("_Queue")]
    other_cols = sorted(set(df.columns) - set([date_col, "_date"] + count_cols + time_cols + util_cols + queue_cols))

    agg = {}
    for c in count_cols: agg[c] = "sum"
    for c in time_cols + util_cols + queue_cols: agg[c] = "mean"
    for c in other_cols: agg[c] = "first"

    daily = df.groupby("_date", as_index=False).agg(agg).sort_values("_date").reset_index(drop=True)
    daily[date_col] = daily["_date"]
    return daily.drop(columns=["_date"]), count_cols, time_cols, util_cols, queue_cols, date_col

def build_schedule_for_day(day: pd.Timestamp) -> pd.DataFrame:
    """07:50~(다음날)07:40 창으로 스케줄 정렬/클램프"""
    rows = []
    anchor_start = pd.Timestamp(day.year, day.month, day.day, 7, 50)
    anchor_end   = pd.Timestamp(day.year, day.month, day.day, 7, 40) + pd.Timedelta(days=1)
    for start_str, end_str, label in SCHEDULE_BLOCKS:
        s_h, s_m = map(int, start_str.split(":"))
        e_h, e_m = map(int, end_str.split(":"))
        start = pd.Timestamp(day.year, day.month, day.day, s_h, s_m)
        end   = pd.Timestamp(day.year, day.month, day.day, e_h, e_m)
        if (s_h, s_m) < (7, 50): start += pd.Timedelta(days=1)
        if (e_h, e_m) < (7, 50) or end <= start: end += pd.Timedelta(days=1)
        s = max(start, anchor_start); e = min(end, anchor_end)
        if e <= s: continue
        minutes = int((e - s).total_seconds() // 60)
        rows.append({"start": s, "end": e, "label": label, "minutes": minutes})
    return pd.DataFrame(rows)

def explode_to_hours(sch: pd.DataFrame) -> pd.DataFrame:
    """블록 → 정시 슬롯 분해 + 겹치는 분(min_overlap)"""
    t_min = sch["start"].min().floor("h"); t_max = sch["end"].max().ceil("h")
    hours = pd.date_range(t_min, t_max, freq="h")
    slots = pd.DataFrame({"slot_start": hours[:-1], "slot_end": hours[1:]})
    out=[]
    for _, blk in sch.iterrows():
        for _, sl in slots.iterrows():
            s = max(blk["start"], sl["slot_start"]); e = min(blk["end"], sl["slot_end"])
            ov = (e - s).total_seconds() / 60.0
            if ov > 0:
                out.append({"slot_start": sl["slot_start"], "slot_end": sl["slot_end"], "label": blk["label"], "min_overlap": ov})
    return pd.DataFrame(out)

def integer_allocate(total: float, weights: np.ndarray) -> np.ndarray:
    """정수 분배(잔차 보정)"""
    w = np.clip(np.asarray(weights, float), 0, None)
    if w.sum() <= 0 or total <= 0: return np.zeros_like(w, dtype=int)
    p = w / w.sum()
    alloc = np.floor(total * p).astype(int)
    residual = int(round(total - alloc.sum()))
    if residual > 0:
        idx = np.argsort(-p)[:residual]; alloc[idx] += 1
    return alloc

def rank_from_daily_top(df_daily: pd.DataFrame, qcols: list, date_col: str) -> pd.Series:
    """날짜별 최댓값 큐 빈도 → 순위(1이 최상)"""
    tops=[]
    for d, g in df_daily.groupby(df_daily[date_col].dt.floor("D")):
        row=g.iloc[0]
        vals={qc: float(row.get(qc,0.0)) for qc in qcols}
        if vals: tops.append(max(vals, key=vals.get))
    freq = pd.Series(tops).value_counts().reindex(qcols, fill_value=0)
    return freq.rank(ascending=False, method="dense").astype(int)

def lin_map(val, vmin, vmax, omin, omax):
    if vmax == vmin: return (omin+omax)/2
    t = (val - vmin) / (vmax - vmin)
    return omin + (1 - t) * (omax - omin)  # 낮은 rank(=상위) → 큰 값

def make_alpha_beta(qcols: list, df_daily: pd.DataFrame, date_col: str) -> tuple[dict, dict]:
    """
    α: 일 최댓값 빈도 순위 기반(완만)
    β: 우선순위 기반 지수 가중을 [BETA_MIN,BETA_MAX]로 강하게 리스케일
    """
    # α — 자동 순위로 완만 보정
    auto_rank = rank_from_daily_top(df_daily, qcols, date_col)  # 1이 최상
    rmin, rmax = auto_rank.min(), auto_rank.max()
    alpha = {qc: float(lin_map(auto_rank.loc[qc], rmin, rmax, ALPHA_MIN, ALPHA_MAX)) for qc in qcols}

    # β — 우선순위 리스트를 지수 가중 후 [BETA_MIN,BETA_MAX]로 리스케일
    if PREFERRED_PRIORITY:
        order = [q for q in PREFERRED_PRIORITY if q in qcols]
        if not order:  # 보호
            order = list(qcols)
        # 지수 스코어(앞에 올수록 큼)
        raw = {q: (BETA_BASE ** (len(order)-i)) for i, q in enumerate(order)}
        for q in qcols:
            raw.setdefault(q, 1.0)
        raw_vals = np.array(list(raw.values()), float)
        rv_min, rv_max = raw_vals.min(), raw_vals.max()
        beta = {q: float(lin_map(raw[q], rv_min, rv_max, BETA_MIN, BETA_MAX)) for q in qcols}
    else:
        # 우선순위 없으면 α순위 재활용(약하게)
        beta = {qc: float(lin_map(auto_rank.loc[qc], rmin, rmax, BETA_MIN, BETA_MAX)) for qc in qcols}
    return alpha, beta

def apply_penalties(qc: str, val: np.ndarray) -> np.ndarray:
    """패널티 패턴 적용(선택)"""
    name = qc.lower()
    for pat, factor in PENALIZE_PATTERNS.items():
        if pat in name:
            return val * float(factor)
    return val

def family_boost_name(name: str) -> float:
    low = name.lower()
    return PROX_UPLIFT if any(k in low for k in PREFERRED_FAMILIES) else PROX_UPLIFT_OTHERS

# =========================
# 3) MAIN TRANSFORM
# =========================
def make_time_scheduled(df_daily: pd.DataFrame,
                        count_cols, time_cols, util_cols, queue_cols,
                        date_col: str,
                        granularity: str = "hourly") -> pd.DataFrame:
    rows = []

    # SKU별 Wait 매핑
    sku_wait_cols = {c.split("_")[0]: c for c in time_cols if c.startswith("SKU") and c.endswith("Wait_Time")}
    overall_wait_name = next((c for c in time_cols if c.endswith("Wait_Time")), None)

    # α, β
    alpha_by_q, beta_by_q = make_alpha_beta(queue_cols, df_daily, date_col)

    for _, day_row in df_daily.iterrows():
        day = pd.to_datetime(day_row[date_col]).floor("D")
        sch = build_schedule_for_day(day)
        if sch.empty: continue

        if granularity == "block":
            slot_df = sch.rename(columns={"start":"slot_start","end":"slot_end"})
            slot_df["min_overlap"] = (slot_df["slot_end"] - slot_df["slot_start"]).dt.total_seconds()/60.0
            slot_df = slot_df[["slot_start","slot_end","label","min_overlap"]]
        else:
            slot_df = explode_to_hours(sch)

        # 생산 가중치
        slot_df["shift_weight"] = slot_df["label"].map(SHIFT_WEIGHTS).fillna(0.0)
        slot_df["work_minutes"] = slot_df["min_overlap"] * (slot_df["shift_weight"] > 0).astype(float)
        slot_df["prod_weight"] = slot_df["work_minutes"] * slot_df["shift_weight"]

        # count 분배(정수)
        for c in count_cols:
            total = float(day_row[c])
            slot_df[c] = integer_allocate(total, slot_df["prod_weight"].values)

        # Util/Time 분배
        safe_eps = 1e-12
        work_mask = slot_df["shift_weight"].values > 0
        sw = slot_df["shift_weight"].values.copy()
        if work_mask.any():
            sw_norm = np.zeros_like(sw, dtype=float)
            mean_sw = sw[work_mask].mean()
            sw_norm[work_mask] = sw[work_mask] / (mean_sw + safe_eps)
        else:
            sw_norm = np.zeros_like(sw, dtype=float)

        for c in util_cols:
            mean_u = float(day_row[c])
            u = np.zeros(len(slot_df), dtype=float)
            u[work_mask] = mean_u * (1.0 - UTIL_BAND + 2*UTIL_BAND * sw_norm[work_mask])
            slot_df[c] = np.clip(u, 0, 1)

        for c in time_cols:
            mean_t = float(day_row[c])
            t = np.zeros(len(slot_df), dtype=float)
            if mean_t != 0 and work_mask.any():
                t[work_mask] = mean_t * (1.0 - TIME_BAND + 2*TIME_BAND * sw_norm[work_mask])
            slot_df[c] = t

        # ---- Queue 생성 ----
        slot_df["slot_seconds"] = (slot_df["slot_end"] - slot_df["slot_start"]).dt.total_seconds()
        per_slot_total = slot_df[count_cols].sum(axis=1).astype(float)
        lam = (per_slot_total / slot_df["slot_seconds"].replace(0, np.nan)).fillna(0.0)

        # 1) (λ×W)^α
        shaped = {}
        for qc in queue_cols:
            sku_tag = next((p for p in qc.split("_") if p.startswith("SKU")), None)
            if sku_tag and sku_tag in sku_wait_cols:
                W = slot_df[sku_wait_cols[sku_tag]].astype(float)
            elif overall_wait_name is not None:
                W = slot_df[overall_wait_name].astype(float)
            else:
                W = pd.Series(np.zeros(len(slot_df)))
            L0 = lam * W
            a = float(alpha_by_q.get(qc, 1.0))
            val = np.power(np.maximum(L0.values, 0.0), a)
            shaped[qc] = val

        # 2) β 가중 + (선택) 패널티
        for qc in queue_cols:
            b = float(beta_by_q.get(qc, 1.0))
            shaped[qc] = apply_penalties(qc, shaped[qc] * b)

        # 3) 슬롯별 근접 우승자 승급 (특히 창고/지게차 계열 선호)
        mat = np.column_stack([shaped[qc] for qc in queue_cols])  # (slots × Q)
        if mat.size > 0:
            max_col = mat.max(axis=1, keepdims=True) + 1e-12
            near_mask = (mat >= PROX_NEAR_FRAC * max_col).astype(float)
            # 계열별 가중
            fam_boost = np.array([family_boost_name(qc) for qc in queue_cols], float)  # (Q,)
            mat = mat * (1.0 + (near_mask * (fam_boost - 1.0)))
            # 다시 dict로 환원
            for j, qc in enumerate(queue_cols):
                shaped[qc] = mat[:, j]

        # 4) 큐별 일평균 보존 재스케일
        for qc in queue_cols:
            daily_mean_q = float(day_row[qc])
            v = shaped[qc]
            if np.any(v > 0) and daily_mean_q != 0:
                scale = daily_mean_q / (np.mean(v) + 1e-9)
                q_vals = v * scale
            else:
                # 생산 가중치 기반 균등 분배(평균 유지)
                w = slot_df["prod_weight"].values
                w = w / (w.sum() + 1e-9)
                q_vals = w * daily_mean_q * len(slot_df)
            slot_df[qc] = q_vals

        # 출력 누적
        for _, r in slot_df.iterrows():
            out = {"Time_Now": r["slot_start"], "Time_End": r["slot_end"], "ShiftLabel": r["label"]}
            for c in count_cols + util_cols + time_cols + queue_cols:
                out[c] = r.get(c, 0)
            rows.append(out)

    result = pd.DataFrame(rows).sort_values(["Time_Now","Time_End","ShiftLabel"]).reset_index(drop=True)

    # 컬럼 순서 정리
    keep_order, all_cols = [], set(count_cols + util_cols + time_cols + queue_cols)
    for c in df_daily.columns:
        if c in all_cols: keep_order.append(c)
    for c in (set(all_cols) - set(keep_order)): keep_order.append(c)
    result = result.reindex(columns=["Time_Now","Time_End","ShiftLabel"] + keep_order)
    return result

# =========================
# 4) RUN
# =========================
if __name__ == "__main__":
    full = pd.read_csv(INPUT_CSV, low_memory=False)

    date_col = detect_date_col(full)
    full[date_col] = to_dt(full[date_col])
    if full[date_col].isna().all():
        raise ValueError(f"날짜 파싱 실패: '{date_col}'")
    full["_date"] = full[date_col].dt.floor("D")

    nunique_dates = full["_date"].nunique()
    is_daily_like = (len(full) == nunique_dates)

    count_cols = [c for c in full.columns if c.startswith("c_Cell") or c == "c_TotalProducts"]
    time_cols  = [c for c in full.columns if c.endswith("_Time")]
    util_cols  = [c for c in full.columns if c.endswith("_Util")]
    queue_cols = [c for c in full.columns if c.endswith("_Queue")]

    if is_daily_like:
        df_daily = full.drop(columns=["_date"]).copy()
        df_daily[date_col] = full["_date"].values
    else:
        df_daily = full.drop(columns=["_date"]).copy()
        df_daily, count_cols, time_cols, util_cols, queue_cols, date_col = aggregate_to_daily(df_daily, date_col)

    out = make_time_scheduled(
        df_daily,
        count_cols=count_cols,
        time_cols=time_cols,
        util_cols=util_cols,
        queue_cols=queue_cols,
        date_col=date_col,
        granularity=GRANULARITY,
    )

    out.to_csv(OUTPUT_CSV, index=False, encoding="utf-8-sig")

    # 디버그: 특정일 시간대 최댓값 큐 카운트
    for tgt in ["2025-03-15", "2025-06-30"]:
        try:
            d = pd.Timestamp(tgt)
            one = out[out["Time_Now"].dt.floor("D") == d].copy()
            qcols = [c for c in queue_cols if c in one.columns]
            topcol = one[qcols].idxmax(axis=1)
            print(f"\n[디버그] {tgt} 시간대 최댓값 큐 상위 10개:")
            print(topcol.value_counts().head(10))
        except Exception:
            pass

    # 요약
    sample_day = pd.to_datetime(df_daily[date_col].iloc[0])
    sample_slots = (explode_to_hours(build_schedule_for_day(sample_day))
                    if GRANULARITY == "hourly"
                    else build_schedule_for_day(sample_day))
    slots_per_day = len(sample_slots) if GRANULARITY == "hourly" else len(SCHEDULE_BLOCKS)
    print(f"\n[OK] Saved -> {OUTPUT_CSV}")
    print(f"입력 날짜 수: {nunique_dates:,} | 대표 하루 슬롯 수: {slots_per_day}")
    print(f"예상 행수 ≈ {nunique_dates * slots_per_day:,} | 실제 행수 = {len(out):,}")


[디버그] 2025-03-15 시간대 최댓값 큐 상위 10개:
Warehouse1_Queue       26
Blanking_SKU1_Queue     9
Name: count, dtype: int64

[디버그] 2025-06-30 시간대 최댓값 큐 상위 10개:
Warehouse1_Queue           24
Blanking_SKU1_Queue         9
Forklift_Blanking_Queue     2
Name: count, dtype: int64

[OK] Saved -> ./FinalResults_time_scheduled_20240101_20250630.csv
입력 날짜 수: 547 | 대표 하루 슬롯 수: 35
예상 행수 ≈ 19,145 | 실제 행수 = 19,145
