# Power Consumption Prediction (Advanced Model)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
import pickle

### 1. Load Data

In [None]:
df = pd.read_csv('power.csv')

### 2. Preprocessing & Feature Engineering

In [None]:
# Convert Datetime to datetime object and set as index
df['Datetime'] = pd.to_datetime(df['Datetime'])
df = df.set_index('Datetime')

# Ensure data has a consistent frequency (10-minute intervals)
# This is crucial for creating correct lags
df = df.asfreq('10T')

# Interpolate missing values (if any were created by asfreq)
df = df.interpolate(method='time')

### 3. **INNOVATION: Create Lag and Rolling Features**

In [None]:
# A lag of '6' = 6 * 10 min = 60 min (1 hour) ago
df['Power_lag_1hr'] = df['PowerConsumption'].shift(6)

# A lag of '144' = 144 * 10 min = 1440 min (24 hours) ago
df['Power_lag_24hr'] = df['PowerConsumption'].shift(144)

# A window of '18' = 18 * 10 min = 180 min (3 hours) rolling average
df['Temp_rolling_avg_3hr'] = df['Temperature'].rolling(window=18).mean()

### 4. Create Original Time Feature

In [None]:
# Original feature from your notebook
df['time_fraction'] = df.index.hour + df.index.minute / 60.0

### 5. Final Data Prep

In [None]:
# Drop NaN values created by shift() and rolling()
print(f"Rows before dropping NaN: {len(df)}")
df = df.dropna()
print(f"Rows after dropping NaN: {len(df)}")

# Define final features (X) and target (y)
features = [
    'Temperature', 
    'WindSpeed', 
    'GeneralDiffuseFlows', 
    'DiffuseFlows',
    'time_fraction',       # Original feature
    'Power_lag_1hr',       # NEW feature
    'Power_lag_24hr',      # NEW feature
    'Temp_rolling_avg_3hr' # NEW feature
]

X = df[features]
y = df['PowerConsumption']

### 6. Train/Test Split

In [None]:
# For time series, it's critical to NOT shuffle the data
# We want to train on the past and test on the (most recent) future
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

### 7. Model Training (Random Forest)

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

### 8. Model Evaluation

In [None]:
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"Model R-squared (RÂ²) score: {r2:.4f}")

### 9. Save the Advanced Model

In [None]:
# Save the new model with a new name
with open('model_advanced.pkl', 'wb') as f:
    pickle.dump(model, f)