In [2]:
# ==========================================
# NOTEBOOK 05: DIGITAL TWIN TRAINING (MACHINE LEARNING)
# ==========================================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
import os

print("[SYSTEM] Initializing system and loading historical data...")

# 1. LOAD DATA & APPLY THERMODYNAMICS
df_batter = pd.read_csv('../data/Muestreo_Depositado_Presion.csv')
df_cream = pd.read_csv('../data/Muestreo_Crema_Presion_V2.csv')
df_jam = pd.read_csv('../data/Muestreo_Mermelada_Presion.csv')

df_batter.rename(columns={'Peso (g)': 'Weight_g', 'Temperatura (°C)': 'Temp_C', 'Viscosidad (mPa.s)': 'Viscosity_mPas', 'Presión (bar)': 'Pressure_bar', '% Reproceso': 'Rework_pct'}, inplace=True)
df_cream.rename(columns={'Peso (g)': 'Weight_g', 'Temperatura (°C)': 'Temp_C', 'Viscosidad (mPa.s)': 'Viscosity_mPas', 'Presión (bar)': 'Pressure_bar'}, inplace=True)
df_jam.rename(columns={'Peso (g)': 'Weight_g', 'Temperatura (°C)': 'Temp_C', 'Viscosidad (mPa.s)': 'Viscosity_mPas', 'Presión (bar)': 'Pressure_bar'}, inplace=True)

# Poiseuille Physics for Batter
rework_diff = df_batter['Rework_pct'] - df_batter['Rework_pct'].mean()
df_batter['Viscosity_mPas'] = df_batter['Viscosity_mPas'] + (rework_diff * 60)
df_batter['Weight_g'] = df_batter['Weight_g'] - (rework_diff * 0.25)

datasets = {
    'Batter': {'data': df_batter, 'features': ['Temp_C', 'Viscosity_mPas', 'Rework_pct', 'Pressure_bar']},
    'Cream': {'data': df_cream, 'features': ['Temp_C', 'Viscosity_mPas', 'Pressure_bar']},
    'Jam': {'data': df_jam, 'features': ['Temp_C', 'Viscosity_mPas', 'Pressure_bar']}
}

# 2. TRAIN ML MODELS & EXPORT
os.makedirs('../models', exist_ok=True)
print("\n=== TRAINING DIGITAL TWINS ===")

for stage, info in datasets.items():
    X = info['data'][info['features']]
    y = info['data']['Weight_g']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = RandomForestRegressor(n_estimators=150, max_depth=10, random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    joblib.dump(model, f'../models/optimizer_{stage.lower()}.pkl')
    print(f"[SUCCESS] [{stage}] Trained & Exported to .pkl | Error (MAE): {mean_absolute_error(y_test, y_pred):.4f}g")

print("\n[SYSTEM] Phase 05 Complete. Models are ready for the interactive app.")

[SYSTEM] Initializing system and loading historical data...

=== TRAINING DIGITAL TWINS ===
[SUCCESS] [Batter] Trained & Exported to .pkl | Error (MAE): 0.7306g
[SUCCESS] [Cream] Trained & Exported to .pkl | Error (MAE): 0.4713g
[SUCCESS] [Jam] Trained & Exported to .pkl | Error (MAE): 0.4058g

[SYSTEM] Phase 05 Complete. Models are ready for the interactive app.
