In [3]:
import pandas as pd
import numpy as np
from pathlib import Path


In [10]:
BASE = Path("Datasets/mockup_ver2/")
IN_PATH  = BASE / "stores.csv"
OUT_PATH = BASE / "stores_with_patterns.csv"

In [7]:
PROFILE_PRESETS = {
    "Gas Station":             {"peaks": [(7,9), (17,21)], "weekday_boost": 1.00, "weekend_boost": 1.05, "festival_boost": 1.05},
    "Educational Institution": {"peaks": [(7,9), (11,13), (15,17)], "weekday_boost": 1.05, "weekend_boost": 0.90, "festival_boost": 1.00},
    "Hospital":                {"peaks": [(6,9), (12,14), (18,21)], "weekday_boost": 1.00, "weekend_boost": 1.00, "festival_boost": 1.00},
    "Entertainment Venue":     {"peaks": [(18,24), (0,2)], "weekday_boost": 0.95, "weekend_boost": 1.10, "festival_boost": 1.10},
    "Office":                  {"peaks": [(7,9), (12,14), (17,19)], "weekday_boost": 1.10, "weekend_boost": 0.85, "festival_boost": 0.95},
    "Residential":             {"peaks": [(7,9), (17,21)], "weekday_boost": 0.98, "weekend_boost": 1.05, "festival_boost": 1.05},
    "Tourist Spot":            {"peaks": [(10,12), (17,21)], "weekday_boost": 1.00, "weekend_boost": 1.10, "festival_boost": 1.15},
    "Transport Hub":           {"peaks": [(6,9), (17,20)], "weekday_boost": 1.05, "weekend_boost": 1.00, "festival_boost": 1.05},
    "Factory":                 {"peaks": [(5,7), (11,12), (17,18)], "weekday_boost": 1.10, "weekend_boost": 0.90, "festival_boost": 0.95},
    "Market":                  {"peaks": [(6,9), (16,19)], "weekday_boost": 0.98, "weekend_boost": 1.08, "festival_boost": 1.10},
}
DEFAULT_PRESET = {"peaks": [(11,13), (17,20)], "weekday_boost": 1.00, "weekend_boost": 1.02, "festival_boost": 1.00}

# ========= FUNCTIONS =========
def normalize_store_columns(df: pd.DataFrame) -> pd.DataFrame:
    colmap = {}
    for c in df.columns:
        cl = c.strip().lower()
        if cl in ["store_id", "branch_id", "shop_id"]:
            colmap[c] = "store_id"
        elif cl in ["profile", "stores.profile", "store_profile", "branch_profile"]:
            colmap[c] = "profile"
    if colmap:
        df = df.rename(columns=colmap)
    if "store_id" not in df.columns or "profile" not in df.columns:
        raise ValueError("ต้องมีคอลัมน์ store_id และ profile (รองรับชื่อ branch_id/branch_profile แล้วรีเนมให้)")
    df["store_id"] = df["store_id"].astype(str).str.strip()
    df["profile"]  = df["profile"].astype(str).str.strip()
    return df

def hour_weights_from_peaks(peaks):
    """
    รับช่วงพีคเป็นลิสต์ของทูเพิล [(start_hour, end_hour), ...]
    ใช้ครึ่งเปิด [start, end) เช่น (11,13) = 11:00-12:59
    คืนอาร์เรย์ขนาด 24 ชม. normalized ให้ค่าเฉลี่ย ~ 1
    """
    w = np.ones(24, dtype=float)
    for s, e in peaks:
        if e >= s:
            hours = range(s, e)
        else:
            # ช่วงข้ามเที่ยงคืน
            hours = list(range(s, 24)) + list(range(0, e))
        for h in hours:
            w[h] += 0.5  # uplift เบื้องต้นต่อชั่วโมงพีค
    w = w / w.mean()
    return w

def encode_peaks(peaks):
    return ";".join([f"{s:02d}-{e:02d}" for s, e in peaks])


In [11]:
def main():
    stores = pd.read_csv(IN_PATH)
    stores = normalize_store_columns(stores)

    # สร้างคอลัมน์ใหม่สำหรับพีคและบูสต์
    stores["peaks_encoded"]  = ""
    stores["weekday_boost"]  = np.nan
    stores["weekend_boost"]  = np.nan
    stores["festival_boost"] = np.nan

    # เตรียมชื่อคอลัมน์น้ำหนักรายชั่วโมง
    hour_cols = [f"hour_w_{h:02d}" for h in range(24)]
    for hc in hour_cols:
        stores[hc] = np.nan

    # เติมค่าตามโปรไฟล์
    for i, row in stores.iterrows():
        prof = str(row["profile"])
        preset = PROFILE_PRESETS.get(prof, DEFAULT_PRESET)

        # บันทึก preset summary
        stores.at[i, "peaks_encoded"]  = encode_peaks(preset["peaks"])
        stores.at[i, "weekday_boost"]  = float(preset["weekday_boost"])
        stores.at[i, "weekend_boost"]  = float(preset["weekend_boost"])
        stores.at[i, "festival_boost"] = float(preset["festival_boost"])

        # คำนวณน้ำหนักรายชั่วโมง
        w = hour_weights_from_peaks(preset["peaks"])
        for h in range(24):
            stores.at[i, f"hour_w_{h:02d}"] = float(w[h])

    # เซฟผลลัพธ์
    stores.to_csv(OUT_PATH, index=False)
    print(f"Saved -> {OUT_PATH}")

if __name__ == "__main__":
    main()

Saved -> Datasets\mockup_ver2\stores_with_patterns.csv
