In [3]:
import pandas as pd
import joblib
import os
import gc # Garbage collector to free up RAM
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

print("1. Loading a memory-safe slice of the dataset...")
# Only load exactly the columns we need to save RAM
cons_cols = ['campus_id', 'timestamp', 'demand_kVA']
weather_cols = ['campus_id', 'timestamp', 'apparent_temperature', 'relative_humidity', 'wind_speed']

# Read only a fraction of the data to prevent 2GB RAM crash
cons_df = pd.read_csv('../data/raw/nmi_consumption.csv', usecols=cons_cols, nrows=100000)
weather_df = pd.read_csv('../data/raw/weather_data.csv', usecols=weather_cols, nrows=200000)

print("2. Filtering and merging data...")
cons_df = cons_df[cons_df['campus_id'] == 1.0]
weather_df = weather_df[weather_df['campus_id'] == 1.0]

cons_df['timestamp'] = pd.to_datetime(cons_df['timestamp'])
weather_df['timestamp'] = pd.to_datetime(weather_df['timestamp'])

df_merged = pd.merge(cons_df, weather_df, on=['campus_id', 'timestamp'], how='inner')

# CRITICAL: Delete the raw data variables and empty the RAM immediately
del cons_df, weather_df
gc.collect()

print("3. Engineering features...")
df_merged['hour'] = df_merged['timestamp'].dt.hour
df_merged['day_of_week'] = df_merged['timestamp'].dt.dayofweek
df_merged = df_merged.dropna()

print("4. Training a lightweight AI model for your i5 processor...")
X = df_merged[['apparent_temperature', 'relative_humidity', 'wind_speed', 'hour', 'day_of_week']]
y = df_merged['demand_kVA']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Very small, single-core Random Forest so the laptop doesn't freeze
rf_model = RandomForestRegressor(n_estimators=15, max_depth=10, n_jobs=1, random_state=42)
rf_model.fit(X_train, y_train)

mae = mean_absolute_error(y_test, rf_model.predict(X_test))
print(f"âœ… AI Engine Trained! Model MAE: {mae:.2f} kVA")

print("5. Saving model and clean data for the dashboard...")
os.makedirs('../artifacts', exist_ok=True)
joblib.dump(rf_model, '../artifacts/econudge_rf_model.pkl')

os.makedirs('../data/processed', exist_ok=True)
df_merged.tail(200).to_csv('../data/processed/campus_1_clean.csv', index=False)

print("ðŸš€ All done! The backend is ready.")

1. Loading a memory-safe slice of the dataset...
2. Filtering and merging data...
3. Engineering features...
4. Training a lightweight AI model for your i5 processor...
âœ… AI Engine Trained! Model MAE: 45.19 kVA
5. Saving model and clean data for the dashboard...
ðŸš€ All done! The backend is ready.
