In [None]:
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from scipy.stats import gaussian_kde
from sklearn.preprocessing import StandardScaler
import os
import sys
from typing import Dict, Any
project_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))
sys.path.append(project_root)
print(project_root)
from src.optimization.milp_schedulerv4_fixed import build_and_solve_schedule_v4
# from src.optimization.milp_schedulerv4_fixed import build_and_solve_schedule_v4



/home/mak/Documents/Optimization/Project


In [3]:
!ls

MonteCarlo.ipynb  monteCarlov2.ipynb


# 1️⃣ PREP STAGE — Build sampling distributions

We use:

✔ Block bootstrap for weather

Preserves week-to-week correlation.

✔ Lognormal for labor

Non-negative, asymmetric, realistic.

✔ KDE for forecast error

Forecasting model → residuals → uncertainty sampling.

In [5]:
def prepare_uncertainty_models(weather_df, forecast_df, labor_df, target_year):
    """
    Build statistically valid uncertainty models for weather, labor, and forecast error.
    """
    # --- 1. WEATHER BLOCK BOOTSTRAP ---
    hist_weather = weather_df[weather_df["year"] < target_year].copy()
    
    weather_by_year = {
        yr: hist_weather[hist_weather["year"] == yr].sort_values("week")
        for yr in hist_weather["year"].unique()
    }

    # --- 2. LABOR (lognormal) ---
    labor_vals = labor_df["Value"].dropna().values
    labor_mean = np.mean(labor_vals)
    labor_std = np.std(labor_vals)

    # convert mean/std → lognormal parameters
    phi = np.sqrt(labor_std**2 + labor_mean**2)
    mu = np.log(labor_mean**2 / phi)
    sigma = np.sqrt(np.log(phi**2 / labor_mean**2))

    # --- 3. FORECAST ERROR KDE ---
    forecast_df["error"] = forecast_df["actual"] - forecast_df["predicted"]
    error_kde = gaussian_kde(forecast_df["error"].values)

    return {
        "weather_by_year": weather_by_year,
        "labor_mu": mu,
        "labor_sigma": sigma,
        "error_kde": error_kde,
    }


# 2️⃣ SAMPLERS — Draw uncertainty
✔ Weather sampled from a historical year block

Preserves correlation automatically.

✔ Labor sampled from lognormal.
✔ Forecast error sampled from KDE.

In [6]:
def sample_scenario(uncertainty_models, target_year, base_weekly_df):
    """
    Generate a full scenario: weather path, labor supply, and forecast adjustments.
    """

    # --- Weather block: pick a random historical year ---
    weather_year = np.random.choice(list(uncertainty_models["weather_by_year"].keys()))
    sampled_weather = uncertainty_models["weather_by_year"][weather_year].copy()

    # Reindex to match target year's weeks
    weeks = base_weekly_df[base_weekly_df["year"] == target_year]["week"].unique()
    sampled_weather = sampled_weather.set_index("week").reindex(weeks, method="nearest")

    # --- Labor ---
    labor_supply = np.random.lognormal(
        uncertainty_models["labor_mu"], 
        uncertainty_models["labor_sigma"]
    )

    # --- Forecast error ---
    forecast_error = uncertainty_models["error_kde"].resample(1)[0]

    return sampled_weather, labor_supply, float(forecast_error)


# 3️⃣ APPLY SCENARIO TO WEEKLY MASTER (in-memory)

In [7]:
def apply_scenario_to_weekly(base_wm, sampled_weather, sampled_labor, forecast_error, target_year):
    """
    Apply weather, labor, and forecast errors to a copy of weekly master.
    No CSV writing. Fully in-memory.
    """
    wm = base_wm.copy()

    mask = wm["year"] == target_year

    # Weather adjustments
    wm.loc[mask, "capacity_factor"] = sampled_weather["capacity_factor"].clip(0.2, 1.2).values

    # Labor adjustments
    wm.loc[mask, "labor_hours"] *= min(max(sampled_labor / 6, 0.5), 2.0)  # scaled + bounded

    # Forecast adjustments (affects planting load)
    wm.loc[mask, "pct_planted_forecasted"] = (
        wm.loc[mask, "pct_planted_forecasted"] + forecast_error
    ).clip(0, 100)

    return wm


# 4️⃣ SINGLE SIMULATION

In [8]:
def run_single_simulation(
    sim_id,
    base_wm,
    fields_path,
    base_params,
    uncertainty_models,
    target_year
):
    try:
        # --- SAMPLE SCENARIO ---
        sampled_weather, sampled_labor, forecast_error = sample_scenario(
            uncertainty_models, target_year, base_wm
        )

        # --- APPLY SCENARIO ---
        wm_scenario = apply_scenario_to_weekly(
            base_wm, sampled_weather, sampled_labor, forecast_error, target_year
        )

        # --- RUN MILP DIRECTLY WITH IN-MEMORY DF ---
        result_df = build_and_solve_schedule_v4(
            fields_path=fields_path,
            weekly_master_df=wm_scenario,     # modified function accepts DF!
            target_year=target_year,
            **base_params
        )

        return {
            "sim_id": sim_id,
            "makespan": result_df["objective_makespan"].max(),
            "total_penalty": result_df["penalty"].sum(),
            "late_fields": (result_df["status"] == 3).sum(),
            "early_fields": (result_df["status"] == 1).sum(),
            "forecast_error": forecast_error,
            "labor_sample": sampled_labor,
        }

    except Exception as e:
        return {
            "sim_id": sim_id,
            "error": str(e),
        }


In [9]:
base_wm = pd.read_csv("../../data/processed/master_weekly_table_scaled.csv")
weather_df = pd.read_csv("../../data/processed/noaa_il_weekly_agg.csv")
labor_df   = pd.read_csv("../../data/raw/no_of_worker2.csv")


In [11]:
uncertainty_models = prepare_uncertainty_models(
    weather_df=weather_df,
    forecast_df=forecast_df,
    labor_df=labor_df,
    target_year=2017    # or whatever year you are simulating
)


NameError: name 'forecast_df' is not defined

In [10]:
base_params = {
    "base_planter_capacity": 1400,
    "base_harvester_capacity": 950,
    "labor_plant_per_acre": 0.15,
    "labor_harvest_per_acre": 0.20,
    "early_penalty_weight": 10,
    "late_penalty_weight": 5,
    "time_limit": 60
}


In [None]:
single_result = run_single_simulation(
    sim_id=1,
    base_wm=base_wm,
    fields_path="../../data/processed/illinois_corn_fields_clean.csv",
    base_params=base_params,
    uncertainty_models=uncertainty_models,
    target_year=2017
)

single_result
