# Task 4: DC-Level Daily Demand Simulator**Objective**: Generate DC-level daily realized demand from segment-level simulation**Pipeline**:1. Run modified OTD simulator (saves country, segment info)2. Disaggregate segment demand to city-level using population proportions3. Map cities to DCs using Task2 assignment file4. Aggregate to DC-level daily demand**Output**: `(sim, date, year, euro_dc_id, model, realized_units)`**Configuration**:- Simulations: 100- Years: 2027-2034- Adoption scenario: 'mp' (most probable)

In [None]:
import pandas as pdimport numpy as npimport openpyxlfrom datetime import date, datetime, timedeltaimport osimport gcfrom pathlib import Pathprint("Imports successful")print(f"Pandas version: {pd.__version__}")print(f"NumPy version: {np.__version__}")

## 1. Configuration & Constants

In [None]:
# ═══════════════════════════════════════════════════════════════# CONFIGURATION# ═══════════════════════════════════════════════════════════════N_SIM = 100SEED = 42YEARS = list(range(2027, 2035))  # 2027-2034ADOPTION_SCENARIO = 'mp'  # most probableBATCH_SIZE = 10  # Save every 10 sims# PathsBASE_DIR = Path('/Users/tianyihu/Documents/Course_Files/6339/Case1')TASK1_DIR = BASE_DIR / 'Task1'TASK2_DIR = BASE_DIR / 'Task2'TASK4_DIR = BASE_DIR / 'Task4'OUTPUT_DIR = TASK4_DIR / 'dc_output'# Create output directoryOUTPUT_DIR.mkdir(exist_ok=True)# ═══════════════════════════════════════════════════════════════# CONSTANTS (from Task2)# ═══════════════════════════════════════════════════════════════CYBER_WEEK_SHARE = 0.18CYBER_PRICE_DISCOUNT = 0.15DOW_WEIGHTS = {    0: 0.10,  # Monday    1: 0.12,  # Tuesday    2: 0.13,  # Wednesday    3: 0.14,  # Thursday    4: 0.18,  # Friday    5: 0.19,  # Saturday    6: 0.14,  # Sunday}# Market entry years by countryENTRY_YEAR_MAP = {    'BE': 2027, 'DE': 2027, 'LU': 2027, 'NL': 2027,    'DK': 2028, 'EE': 2028, 'FI': 2028, 'LT': 2028, 'LV': 2028, 'SE': 2028,    'AT': 2029, 'CZ': 2029, 'ES': 2029, 'FR': 2029, 'IT': 2029, 'PL': 2029, 'PT': 2029,    'BG': 2030, 'GR': 2030, 'HR': 2030, 'HU': 2030, 'IE': 2030, 'RO': 2030, 'SI': 2030, 'SK': 2030,}print(f"Configuration loaded")print(f"  Simulations: {N_SIM}")print(f"  Years: {YEARS[0]}-{YEARS[-1]}")print(f"  Adoption scenario: {ADOPTION_SCENARIO}")print(f"  Output directory: {OUTPUT_DIR}")

## 2. Core Functions (from Task2 Simulator)

### 2.1 Calendar Function

In [None]:
def build_year_calendar(year: int) -> pd.DataFrame:    """Build calendar with Cyber Week and period assignments."""    start = date(year, 1, 1)    end = date(year, 12, 31)    dates = pd.date_range(start, end, freq='D')        cal = pd.DataFrame({'date': dates})    cal['day_of_week'] = cal['date'].dt.dayofweek    cal['week'] = cal['date'].dt.isocalendar().week        # Cyber Week: week containing Black Friday (4th Thursday of November)    nov_days = [d for d in dates if d.month == 11]    thursdays = [d for d in nov_days if d.dayofweek == 3]    if len(thursdays) >= 4:        bf = thursdays[3]        cyber_week = bf.isocalendar()[1]        cal['is_cyber_week'] = cal['week'] == cyber_week    else:        cal['is_cyber_week'] = False        # Period assignment (1-13)    cal['period'] = ((cal['date'].dt.dayofyear - 1) // 28) + 1    cal.loc[cal['period'] > 13, 'period'] = 13        return cal# Testtest_cal = build_year_calendar(2027)print(f"Calendar test: {len(test_cal)} days in 2027")print(f"Cyber Week days: {test_cal['is_cyber_week'].sum()}")

### 2.2 Adoption Rate Function

In [None]:
def adoption_rate_by_scenario(market_year: int, scenario: str = 'mp') -> float:    """    Returns adoption rate for a given market year under specified scenario.    market_year: 1-based (year 1 = entry year)    scenario: 'pes' (pessimistic), 'mp' (most probable), 'opt' (optimistic)    """    scenario = scenario.lower().strip()        rates = {        'pes': [0.001, 0.0015, 0.002, 0.003, 0.004, 0.006, 0.008, 0.010],        'mp':  [0.002, 0.003, 0.005, 0.008, 0.012, 0.018, 0.025, 0.032],        'opt': [0.003, 0.006, 0.012, 0.020, 0.032, 0.048, 0.064, 0.080],    }        if scenario not in rates:        raise ValueError(f"Invalid scenario: {scenario}")        idx = min(market_year - 1, len(rates[scenario]) - 1)    return rates[scenario][max(0, idx)]# Testfor yr in [1, 3, 5, 8]:    print(f"Market year {yr}: {adoption_rate_by_scenario(yr, 'mp'):.4f}")

### 2.3 Period Shares (Triangular Distribution)

In [None]:
def simulate_period_shares(n_sim: int = 1, seed: int = None) -> np.ndarray:    """    Simulate period shares (13 periods) using triangular distribution.    Returns: array of shape (n_sim, 13) with shares summing to 1.0    """    rng = np.random.default_rng(seed)    draws = rng.triangular(left=0.5, mode=1.0, right=1.5, size=(n_sim, 13))    return draws / draws.sum(axis=1, keepdims=True)# Testtest_shares = simulate_period_shares(n_sim=3, seed=42)print(f"Period shares shape: {test_shares.shape}")print(f"Sample shares (sim 0): {test_shares[0].round(4)}")print(f"Sum: {test_shares[0].sum():.6f}")

### 2.4 Model Shares (Triangular Variation)

In [None]:
def triangular_model_shares(base_shares: np.ndarray, sim: int, year: int) -> np.ndarray:    """    Apply triangular variation to base model shares.    Returns: array of 24 model shares summing to 1.0    """    seed_val = sim * 10000 + year    rng = np.random.default_rng(seed_val)    draws = rng.triangular(left=0.8, mode=1.0, right=1.2, size=len(base_shares))    adjusted = base_shares * draws    return adjusted / adjusted.sum()# Test (will test after loading model_df)print("Model shares function loaded")

### 2.5 OTD Conversion Rate

In [None]:
def get_otd_conversion_rate(segment: str, otd_days: float) -> float:    """    Calculate purchase probability based on OTD bucket.    segment: 'Metro' or 'Non-Metro'    otd_days: order-to-delivery time in days    """    # Bucket OTD    if otd_days <= 1.0:        bucket = 0    elif otd_days <= 2.0:        bucket = 1    elif otd_days <= 3.0:        bucket = 2    else:        bucket = 3        # Conversion rates by bucket and segment    conversion_matrix = {        'Metro': [1.00, 0.95, 0.85, 0.70],        'Non-Metro': [1.00, 0.98, 0.93, 0.85],    }        return conversion_matrix.get(segment, [0.7]*4)[bucket]# Testprint(f"Metro, 0.5 days: {get_otd_conversion_rate('Metro', 0.5):.2f}")print(f"Metro, 2.5 days: {get_otd_conversion_rate('Metro', 2.5):.2f}")print(f"Non-Metro, 3.5 days: {get_otd_conversion_rate('Non-Metro', 3.5):.2f}")

## 3. Data Loading

### 3.1 Load Population and Model Data

In [None]:
# Load BotWorld inputsinput_file = TASK1_DIR / 'BotWorld_inputs.xlsx'print(f"Loading data from: {input_file}")# Metro cities with population projectionsmetro_df = pd.read_excel(input_file, sheet_name='Metro')print(f"  Metro cities: {len(metro_df)}")# Non-metro population by countrynonmetro_df = pd.read_excel(input_file, sheet_name='NonMetro')print(f"  Non-metro countries: {len(nonmetro_df)}")# 24 product models with prices and market sharesmodel_df = pd.read_excel(input_file, sheet_name='Model')print(f"  Product models: {len(model_df)}")# Prepare model datamodel_df['Share_MP'] = model_df['Share_MP'] / model_df['Share_MP'].sum()model_codes = model_df['Model'].valuesmodel_prices = model_df['Price_EUR'].valuesmodel_shares_base = model_df['Share_MP'].valuesprint(f"\nModel categories: {model_df['Category'].value_counts().to_dict()}")print(f"Price range: €{model_prices.min():.0f} - €{model_prices.max():.0f}")

### 3.2 Load DC Assignment Data

In [None]:
# Load city-to-DC assignment from Task2assignment_file = TASK2_DIR / 'assignment_with_otd_prob_reachable.csv'assignment_df = pd.read_csv(assignment_file)print(f"Loaded assignment file: {len(assignment_df)} records")# Extract country and segment from node_idassignment_df['country'] = assignment_df['node_id'].str.extract(r'_(..?)_')[0]assignment_df['segment'] = assignment_df['node_type'].apply(    lambda x: 'Metro' if x == 'metro' else 'Non-Metro')print(f"  Years: {sorted(assignment_df['year'].unique())}")print(f"  Countries: {len(assignment_df['country'].unique())}")print(f"  DCs: {len(assignment_df['assigned_cand'].unique())}")print(f"  Node types: {assignment_df['node_type'].value_counts().to_dict()}")# Sampleprint("\nSample assignments:")print(assignment_df[['year', 'node_id', 'assigned_cand', 'units', 'country', 'segment']].head())

### 3.3 Prepare City Proportion Lookup

In [None]:
# Calculate city proportions within each (year, country, segment)# This will be used to disaggregate segment-level demand to city-leveldef calculate_city_proportions(assignment_df):    """    For each (year, country, segment), calculate what proportion each city represents.    Returns: DataFrame with columns [year, country, segment, node_id, proportion, assigned_cand]    """    # Calculate segment totals    segment_totals = assignment_df.groupby(['year', 'country', 'segment'])['units'].transform('sum')        # Calculate proportions    proportions = assignment_df.copy()    proportions['proportion'] = proportions['units'] / segment_totals        # Keep only necessary columns    proportions = proportions[['year', 'country', 'segment', 'node_id', 'proportion', 'assigned_cand', 'units']]        return proportionscity_proportions = calculate_city_proportions(assignment_df)print(f"City proportions calculated: {len(city_proportions)} entries")# Validation: proportions should sum to 1.0 within each segmentvalidation = city_proportions.groupby(['year', 'country', 'segment'])['proportion'].sum()print(f"  Proportion sums - Min: {validation.min():.6f}, Max: {validation.max():.6f}")# Sampleprint("\nSample proportions (Germany Metro 2027):")sample = city_proportions[    (city_proportions['year'] == 2027) &     (city_proportions['country'] == 'DE') &     (city_proportions['segment'] == 'Metro')].head(10)print(sample[['node_id', 'proportion', 'assigned_cand']].to_string(index=False))

### 3.4 Load OTD Data (from Task2)

In [None]:
# Load OTD data - use assignment file's OTD information# Create lookup dictionaries for OTD by city/country and yearmetro_city_otd = {}nonmetro_otd = {}for _, row in assignment_df.iterrows():    year = row['year']    node_id = row['node_id']    otd_days = row['otd_days_promise']        if row['segment'] == 'Metro':        city = node_id.replace('METRO_', '').replace(row['country'] + '_', '')        if city not in metro_city_otd:            metro_city_otd[city] = {}        metro_city_otd[city][year] = otd_days    else:        country = row['country']        if country not in nonmetro_otd:            nonmetro_otd[country] = {}        nonmetro_otd[country][year] = otd_daysprint(f"OTD data loaded:")print(f"  Metro cities with OTD: {len(metro_city_otd)}")print(f"  Non-metro countries with OTD: {len(nonmetro_otd)}")# For cities without year-specific OTD, we'll use country-level population-weighted average# Build this from assignment datacountry_metro_otd_avg = assignment_df[assignment_df['segment'] == 'Metro'].groupby(['year', 'country']).apply(    lambda x: (x['otd_days_promise'] * x['units']).sum() / x['units'].sum() if x['units'].sum() > 0 else 2.0).to_dict()print(f"  Country-level metro OTD averages: {len(country_metro_otd_avg)} entries")

## 4. Modified Simulator Function

**Key Modification**: Instead of aggregating to `(sim, date, model)`, we save `(sim, date, year, country, segment, model, sales_units)` to enable city-level disaggregation later.

In [None]:
def run_modified_simulator(    metro_df, nonmetro_df, model_df, entry_year_map, years,    metro_city_otd, nonmetro_otd, country_metro_otd_avg,    n_sim=100, seed=42, batch_size=10, out_dir=None, adoption_scenario='mp'):    """    Modified OTD simulator that saves (country, segment) information.        Output per batch: (sim, date, year, country, segment, model, sales_units)    """    adoption_scenario = adoption_scenario.lower().strip()    if adoption_scenario not in {'pes', 'mp', 'opt'}:        raise ValueError(f"adoption_scenario must be one of {{'pes','mp','opt'}}, got '{adoption_scenario}'")        if out_dir:        os.makedirs(out_dir, exist_ok=True)        rng_global = np.random.default_rng(seed)    model_shares_base = model_df['Share_MP'].values    model_codes = model_df['Model'].values    model_prices = model_df['Price_EUR'].values        sim_records = []    annual_summary = []    segment_summary = []        for sim in range(n_sim):        print(f'  Sim {sim+1}/{n_sim}...', end=' ', flush=True)                for yr in years:            cal = build_year_calendar(yr)            cw_days = cal[cal['is_cyber_week']]['date'].values            ncw_days = cal[~cal['is_cyber_week']]            n_cw = len(cw_days)                        # ── 1. Annual demand by country-segment ──            segment_annual = {}                        for _, city_row in metro_df.iterrows():                country = city_row['Country']                if country not in entry_year_map:                    continue                market_year = yr - entry_year_map[country] + 1                if market_year < 1:                    continue                pop = city_row[f'Pop_{yr}']                rate = adoption_rate_by_scenario(market_year, adoption_scenario)                key = (country, 'Metro')                segment_annual[key] = segment_annual.get(key, 0) + pop * rate                        for _, nm_row in nonmetro_df.iterrows():                country = nm_row['Country']                if country not in entry_year_map:                    continue                market_year = yr - entry_year_map[country] + 1                if market_year < 1:                    continue                pop = nm_row[f'NonMetroPop_{yr}']                rate = adoption_rate_by_scenario(market_year, adoption_scenario)                segment_annual[(country, 'Non-Metro')] = pop * rate                        if not segment_annual:                continue            total_annual = sum(segment_annual.values())            if total_annual == 0:                continue                        # ── 2. Period shares ──            period_shares = simulate_period_shares(                n_sim=1, seed=int(rng_global.integers(0, 1_000_000))            )[0]                        # ── 3. Cyber vs regular split ──            cyber_total = total_annual * CYBER_WEEK_SHARE            regular_total = total_annual * (1.0 - CYBER_WEEK_SHARE)                        # ── 4. Daily demand allocation ──            day_demand_total = {}            for p in range(1, 14):                period_units = regular_total * period_shares[p - 1]                p_days = ncw_days[ncw_days['period'] == p]                if len(p_days) == 0:                    continue                dow_w = p_days['day_of_week'].map(lambda d: DOW_WEIGHTS[d]).values                dow_w = dow_w / dow_w.sum()                for d_idx, (_, day_row) in enumerate(p_days.iterrows()):                    day_demand_total[day_row['date']] = period_units * dow_w[d_idx]                        if n_cw > 0:                cw_cal = cal[cal['is_cyber_week']]                cw_dow_w = cw_cal['day_of_week'].map(lambda d: DOW_WEIGHTS[d]).values                cw_dow_w = cw_dow_w / cw_dow_w.sum()                for i, (_, cw_row) in enumerate(cw_cal.iterrows()):                    day_demand_total[cw_row['date']] = cyber_total * cw_dow_w[i]                        # ── 5. Segment weights & OTD ──            segment_weights = {k: v / total_annual for k, v in segment_annual.items()}            cw_date_set = {pd.Timestamp(x).date() if not isinstance(x, date) else x                          for x in cw_days}                        # Calculate country-level metro OTD (population-weighted)            country_metro_otd_yr = {}            for _, city_row in metro_df.iterrows():                country = city_row['Country']                city = city_row['City']                pop = city_row[f'Pop_{yr}']                city_otd = metro_city_otd.get(city, {}).get(yr, 2.0)                if country not in country_metro_otd_yr:                    country_metro_otd_yr[country] = {'weighted_sum': 0.0, 'total_pop': 0.0}                country_metro_otd_yr[country]['weighted_sum'] += city_otd * pop                country_metro_otd_yr[country]['total_pop'] += pop                        for country in country_metro_otd_yr:                tp = country_metro_otd_yr[country]['total_pop']                country_metro_otd_yr[country] = (                    country_metro_otd_yr[country]['weighted_sum'] / tp if tp > 0 else 2.0                )                        # ── 6. OTD conversion at segment level ──            daily_segment_sales = {}            for d, total_units in day_demand_total.items():                is_cw = d in cw_date_set                for (country, segment), seg_weight in segment_weights.items():                    seg_demand = total_units * seg_weight                    if segment == 'Metro':                        otd_days = country_metro_otd_yr.get(country, 2.0)                    else:                        otd_days = nonmetro_otd.get(country, {}).get(yr, 5.0)                    conversion_rate = get_otd_conversion_rate(segment, otd_days)                    daily_segment_sales[(d, country, segment, is_cw)] = {                        'demand': seg_demand,                        'sales': seg_demand * conversion_rate,                        'otd_days': otd_days,                        'conversion_rate': conversion_rate,                    }                        # ── 7. Decompose to 24 products ──            model_shares = triangular_model_shares(model_shares_base, sim, yr)            for (d, country, segment, is_cw), agg in daily_segment_sales.items():                ms_vec = agg['sales'] * model_shares                md_vec = agg['demand'] * model_shares                p_factor = (1.0 - CYBER_PRICE_DISCOUNT) if is_cw else 1.0                for m_idx, mdl in enumerate(model_codes):                    ms = ms_vec[m_idx]                    if ms < 1e-9:                        continue                    sim_records.append({                        'sim': sim,                        'date': pd.Timestamp(d),                        'year': yr,                        'country': country,                        'segment': segment,                        'is_cyber_week': is_cw,                        'demand_units': md_vec[m_idx],                        'otd_days': agg['otd_days'],                        'conversion_rate': agg['conversion_rate'],                        'sales_units': ms,                        'model': mdl,                        'revenue': ms * model_prices[m_idx] * p_factor,                    })                print('Done')                # ── Batch flush with country & segment preserved ──        if batch_size and out_dir and (sim + 1) % batch_size == 0:            batch_idx = (sim + 1) // batch_size - 1            batch_df = pd.DataFrame(sim_records)                        # KEY MODIFICATION: Save (sim, date, year, country, segment, model, sales_units)            # This preserves geographic information for later disaggregation            segment_batch = (                batch_df[['sim', 'date', 'year', 'country', 'segment', 'model', 'sales_units']]                .groupby(['sim', 'date', 'year', 'country', 'segment', 'model'], as_index=False)                .agg(sales_units=('sales_units', 'sum'))            )                        out_path = os.path.join(out_dir, f'batch_{batch_idx:02d}.csv.gz')            segment_batch.to_csv(out_path, index=False, compression='gzip')            print(f'    → Saved batch {batch_idx} to {out_path}')                        # Annual summary            annual_summary.append(                batch_df.groupby(['sim', 'year']).agg(                    demand_units=('demand_units', 'sum'),                    sales_units=('sales_units', 'sum'),                    revenue=('revenue', 'sum'),                ).reset_index()            )                        # Segment summary            segment_summary.append(                batch_df.groupby(['sim', 'year', 'country', 'segment']).agg(                    demand_units=('demand_units', 'sum'),                    sales_units=('sales_units', 'sum'),                    otd_days=('otd_days', 'mean'),                ).reset_index()            )                        # Free memory            del batch_df, segment_batch            sim_records = []            gc.collect()        # Return summaries    if annual_summary:        annual_df = pd.concat(annual_summary, ignore_index=True)        segment_df = pd.concat(segment_summary, ignore_index=True)        return {'annual': annual_df, 'segment': segment_df}    else:        return pd.DataFrame(sim_records)print("Modified simulator function loaded")print("Key change: Saves (sim, date, year, country, segment, model, sales_units)")

## 5. City-Level Disaggregation & DC Aggregation

### 5.1 Disaggregate Segment-Level to City-Level

In [None]:
def disaggregate_to_city_level(segment_df, city_proportions):    """    Disaggregate segment-level demand to city-level using population proportions.        Input: (sim, date, year, country, segment, model, sales_units)    Output: (sim, date, year, node_id, euro_dc_id, model, city_demand)    """    print("\nDisaggregating segment-level demand to city-level...")        # Merge with city proportions    merged = segment_df.merge(        city_proportions,        on=['year', 'country', 'segment'],        how='left'    )        # Calculate city-level demand    merged['city_demand'] = merged['sales_units'] * merged['proportion']        # Select final columns    city_level = merged[[        'sim', 'date', 'year', 'node_id', 'assigned_cand', 'model', 'city_demand'    ]].copy()        city_level.rename(columns={'assigned_cand': 'euro_dc_id'}, inplace=True)        print(f"  Input records: {len(segment_df):,}")    print(f"  Output records: {len(city_level):,}")    print(f"  Expansion factor: {len(city_level)/len(segment_df):.1f}x")        # Validation: total demand should be conserved    total_in = segment_df['sales_units'].sum()    total_out = city_level['city_demand'].sum()    print(f"  Demand conservation: {total_out/total_in*100:.2f}%")        return city_levelprint("City-level disaggregation function loaded")

### 5.2 Aggregate City-Level to DC-Level

In [None]:
def aggregate_to_dc_level(city_df):    """    Aggregate city-level demand to DC-level.        Input: (sim, date, year, node_id, euro_dc_id, model, city_demand)    Output: (sim, date, year, euro_dc_id, model, realized_units)    """    print("\nAggregating city-level demand to DC-level...")        dc_level = city_df.groupby(        ['sim', 'date', 'year', 'euro_dc_id', 'model'],        as_index=False    ).agg(realized_units=('city_demand', 'sum'))        print(f"  Input records: {len(city_df):,}")    print(f"  Output records: {len(dc_level):,}")    print(f"  Reduction factor: {len(city_df)/len(dc_level):.1f}x")        # Validation    total_in = city_df['city_demand'].sum()    total_out = dc_level['realized_units'].sum()    print(f"  Demand conservation: {total_out/total_in*100:.2f}%")        return dc_levelprint("DC-level aggregation function loaded")

## 6. Main Execution Workflow

This section runs the complete pipeline:1. Run modified simulator → Save batches with (country, segment)2. Load all batches3. Disaggregate to city-level4. Aggregate to DC-level5. Save final output

In [None]:
# ═══════════════════════════════════════════════════════════════# STEP 1: Run Modified Simulator# ═══════════════════════════════════════════════════════════════print("=" * 70)print("STEP 1: Running Modified Simulator")print("=" * 70)print(f"Configuration:")print(f"  Simulations: {N_SIM}")print(f"  Years: {YEARS[0]}-{YEARS[-1]}")print(f"  Adoption: {ADOPTION_SCENARIO}")print(f"  Batch size: {BATCH_SIZE}")print()summary = run_modified_simulator(    metro_df=metro_df,    nonmetro_df=nonmetro_df,    model_df=model_df,    entry_year_map=ENTRY_YEAR_MAP,    years=YEARS,    metro_city_otd=metro_city_otd,    nonmetro_otd=nonmetro_otd,    country_metro_otd_avg=country_metro_otd_avg,    n_sim=N_SIM,    seed=SEED,    batch_size=BATCH_SIZE,    out_dir=str(OUTPUT_DIR),    adoption_scenario=ADOPTION_SCENARIO)print("\n" + "=" * 70)print("Simulation Complete!")print("=" * 70)

In [None]:
# ═══════════════════════════════════════════════════════════════# STEP 2: Load All Batch Files# ═══════════════════════════════════════════════════════════════print("\n" + "=" * 70)print("STEP 2: Loading All Batch Files")print("=" * 70)batch_files = sorted(OUTPUT_DIR.glob('batch_*.csv.gz'))print(f"Found {len(batch_files)} batch files")all_batches = []for batch_file in batch_files:    batch_df = pd.read_csv(batch_file)    all_batches.append(batch_df)    print(f"  Loaded {batch_file.name}: {len(batch_df):,} records")segment_output = pd.concat(all_batches, ignore_index=True)print(f"\nTotal records: {len(segment_output):,}")print(f"Columns: {list(segment_output.columns)}")print(f"\nSample:")print(segment_output.head(10))

In [None]:
# ═══════════════════════════════════════════════════════════════# STEP 3: Disaggregate to City-Level# ═══════════════════════════════════════════════════════════════print("\n" + "=" * 70)print("STEP 3: Disaggregating to City-Level")print("=" * 70)city_output = disaggregate_to_city_level(segment_output, city_proportions)print(f"\nCity-level output shape: {city_output.shape}")print(f"\nSample:")print(city_output.head(10))

In [None]:
# ═══════════════════════════════════════════════════════════════# STEP 4: Aggregate to DC-Level# ═══════════════════════════════════════════════════════════════print("\n" + "=" * 70)print("STEP 4: Aggregating to DC-Level")print("=" * 70)dc_output = aggregate_to_dc_level(city_output)print(f"\nDC-level output shape: {dc_output.shape}")print(f"\nColumns: {list(dc_output.columns)}")print(f"\nSample:")print(dc_output.head(20))# Clean up memorydel segment_output, city_outputgc.collect()

In [None]:
# ═══════════════════════════════════════════════════════════════# STEP 5: Save Final Output# ═══════════════════════════════════════════════════════════════print("\n" + "=" * 70)print("STEP 5: Saving Final Output")print("=" * 70)output_file = OUTPUT_DIR / 'dc_daily_demand.csv.gz'dc_output.to_csv(output_file, index=False, compression='gzip')print(f"Saved to: {output_file}")print(f"File size: {output_file.stat().st_size / 1024 / 1024:.1f} MB")print(f"Total records: {len(dc_output):,}")print("\nOutput schema: (sim, date, year, euro_dc_id, model, realized_units)")

## 7. Validation & Sanity Checks

In [None]:
print("=" * 70)print("VALIDATION & SANITY CHECKS")print("=" * 70)# Check 1: Simulation coverageprint("\n1. Simulation Coverage")print("-" * 70)sims = dc_output['sim'].unique()print(f"  Number of simulations: {len(sims)}")print(f"  Expected: {N_SIM}")print(f"  Range: {sims.min()} - {sims.max()}")assert len(sims) == N_SIM, "Missing simulations!"# Check 2: Year coverageprint("\n2. Year Coverage")print("-" * 70)years = sorted(dc_output['year'].unique())print(f"  Years present: {years}")print(f"  Expected: {YEARS}")assert years == YEARS, "Year mismatch!"# Check 3: Date rangeprint("\n3. Date Range")print("-" * 70)min_date = dc_output['date'].min()max_date = dc_output['date'].max()print(f"  Min date: {min_date}")print(f"  Max date: {max_date}")print(f"  Total days: {(pd.to_datetime(max_date) - pd.to_datetime(min_date)).days + 1}")# Check 4: DC coverageprint("\n4. DC Coverage")print("-" * 70)dcs = dc_output['euro_dc_id'].unique()print(f"  Number of DCs: {len(dcs)}")print(f"  DCs: {sorted(dcs)}")# Check 5: Model coverageprint("\n5. Model Coverage")print("-" * 70)models = sorted(dc_output['model'].unique())print(f"  Number of models: {len(models)}")print(f"  Expected: 24")print(f"  Models: {models}")assert len(models) == 24, "Missing models!"# Check 6: Realized units statisticsprint("\n6. Realized Units Statistics")print("-" * 70)print(f"  Total realized units: {dc_output['realized_units'].sum():,.0f}")print(f"  Mean per record: {dc_output['realized_units'].mean():.2f}")print(f"  Median per record: {dc_output['realized_units'].median():.2f}")print(f"  Min: {dc_output['realized_units'].min():.6f}")print(f"  Max: {dc_output['realized_units'].max():.2f}")# Check 7: Null valuesprint("\n7. Null Value Check")print("-" * 70)nulls = dc_output.isnull().sum()print(nulls)assert nulls.sum() == 0, "Null values found!"print("\n" + "=" * 70)print("✓ ALL VALIDATION CHECKS PASSED")print("=" * 70)

## 8. Summary Statistics & Insights

In [None]:
print("=" * 70)print("SUMMARY STATISTICS")print("=" * 70)# Aggregate by DCprint("\n1. Total Demand by DC (across all sims)")print("-" * 70)dc_totals = dc_output.groupby('euro_dc_id')['realized_units'].sum().sort_values(ascending=False)print(dc_totals)print(f"\nTotal across all DCs: {dc_totals.sum():,.0f}")# Aggregate by yearprint("\n2. Total Demand by Year (across all sims)")print("-" * 70)year_totals = dc_output.groupby('year')['realized_units'].sum() / N_SIMprint(year_totals)# Aggregate by model categoryprint("\n3. Total Demand by Model (top 10)")print("-" * 70)model_totals = dc_output.groupby('model')['realized_units'].sum().sort_values(ascending=False)print(model_totals.head(10))# Average daily demand by DCprint("\n4. Average Daily Demand by DC (per sim)")print("-" * 70)dc_daily_avg = dc_output.groupby(['sim', 'date', 'euro_dc_id'])['realized_units'].sum().groupby('euro_dc_id').mean()print(dc_daily_avg.sort_values(ascending=False))

## 9. Sample Visualizations (Optional)

In [None]:
# Note: Uncomment if matplotlib is available# import matplotlib.pyplot as plt# # Plot 1: Total demand over time (average across sims)# daily_demand = dc_output.groupby('date')['realized_units'].sum() / N_SIM# # plt.figure(figsize=(14, 6))# plt.plot(daily_demand.index, daily_demand.values)# plt.title('Daily Total Realized Demand (Average across simulations)')# plt.xlabel('Date')# plt.ylabel('Realized Units')# plt.grid(True, alpha=0.3)# plt.tight_layout()# plt.savefig(OUTPUT_DIR / 'daily_demand.png', dpi=150)# print("Saved: daily_demand.png")# # # Plot 2: Demand by DC over years# dc_yearly = dc_output.groupby(['year', 'euro_dc_id'])['realized_units'].sum() / N_SIM# dc_yearly_pivot = dc_yearly.unstack(fill_value=0)# # plt.figure(figsize=(12, 8))# dc_yearly_pivot.T.plot(kind='bar', stacked=True, ax=plt.gca())# plt.title('Annual Demand by DC (Average across simulations)')# plt.xlabel('DC')# plt.ylabel('Realized Units')# plt.legend(title='Year', bbox_to_anchor=(1.05, 1), loc='upper left')# plt.tight_layout()# plt.savefig(OUTPUT_DIR / 'dc_yearly_demand.png', dpi=150)# print("Saved: dc_yearly_demand.png")print("Visualization section (commented out by default)")

## 10. Final Summary

In [None]:
print("\n" + "=" * 70)print("TASK 4 COMPLETE - DC DEMAND SIMULATION")print("=" * 70)print(f"\n✓ Successfully generated DC-level daily demand")print(f"\nOutput file: {output_file}")print(f"  - Simulations: {N_SIM}")print(f"  - Years: {YEARS[0]}-{YEARS[-1]}")print(f"  - DCs: {len(dcs)}")print(f"  - Models: {len(models)}")print(f"  - Total records: {len(dc_output):,}")print(f"  - File size: {output_file.stat().st_size / 1024 / 1024:.1f} MB")print(f"\nSchema: (sim, date, year, euro_dc_id, model, realized_units)")print(f"\nNext steps:")print(f"  1. Load dc_daily_demand.csv.gz for further analysis")print(f"  2. Use for capacity planning, inventory management, etc.")print(f"  3. Aggregate as needed for different planning horizons")print("\n" + "=" * 70)