# Energy Usage Forecasting â€” Demo Notebook

This notebook demonstrates a simple workflow: load data, preprocess, feature engineering, train a model, evaluate, and save results. Replace the sample CSV with your real data in `data/`.

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv(r'/mnt/data/data/sample_energy.csv')
df['date'] = pd.to_datetime(df['date'])
df.head()

## Exploratory checks

In [None]:
print(df.describe())
print('\nMissing values:\n', df.isnull().sum())
df.plot(x='date', y='energy_kwh', figsize=(10,3))
plt.title('Daily energy usage (kWh)')
plt.show()

## Preprocessing & Feature Engineering

In [None]:
df['day_of_week'] = df['date'].dt.weekday
df['day_of_year'] = df['date'].dt.dayofyear
df['lag1'] = df['energy_kwh'].shift(1)
df['lag1'] = df['lag1'].fillna(df['energy_kwh'].mean())
df.head()

## Train / Test split (time-based)

In [None]:
df_sorted = df.sort_values('date')
split_idx = int(len(df_sorted)*0.8)
train = df_sorted.iloc[:split_idx]
test = df_sorted.iloc[split_idx:]
print('Train days:', len(train), 'Test days:', len(test))

In [None]:
features = ['temperature', 'day_of_week', 'day_of_year', 'lag1']
X_train = train[features]
y_train = train['energy_kwh']
X_test = test[features]
y_test = test['energy_kwh']
X_train.shape, X_test.shape

## Model training

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
joblib.dump(model, '/mnt/data/results/energy_model.joblib')
print('Model trained and saved to /mnt/data/results/energy_model.joblib')

## Evaluation

In [None]:
preds = model.predict(X_test)
mae = mean_absolute_error(y_test, preds)
r2 = r2_score(y_test, preds)
print(f'MAE: {mae:.3f} kWh')
print(f'R2: {r2:.3f}')
plt.figure(figsize=(10,3))
plt.plot(test['date'], y_test.values, label='Actual')
plt.plot(test['date'], preds, label='Predicted')
plt.legend()
plt.title('Actual vs Predicted Energy Usage (Test set)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Next steps / Notes
- Replace the sample CSV with your real smart-meter data.
- Tune hyperparameters and add features (weather forecasts, holidays).
- Set up periodic retraining and monitoring in production.