# Energy Prediction Model for House Energy Consumption Optimization

In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [20]:
df = pd.read_csv(r'C:\Users\LENOVO\Energyconsumption\Backend\ml_model\energydata_complete.csv')
df.head()



Unnamed: 0,Appliances,lights,T_in,RH_in,T_out,RH_out,Windspeed
0,60,30,19.89,47.596667,6.6,92.0,7.0
1,60,30,19.89,46.693333,6.48,92.0,6.666667
2,50,30,19.89,46.3,6.37,92.0,6.333333
3,50,40,19.89,46.066667,6.25,92.0,6.0
4,60,40,19.89,46.333333,6.13,92.0,5.666667


In [21]:
cols = [col for col in df.columns if col != 'date']
df = df[cols].round(2)

In [22]:
# Select features and target
features = ['lights', 'T_in', 'RH_in', 'T_out', 'Windspeed']  # Excluded RH_out
X = df[features]
y = df['Appliances']  # Target is the raw numerical value in Wh

In [24]:
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
scaled_df = pd.DataFrame(X_scaled, columns=features)
scaled_df['Appliances'] = y

In [25]:
# Split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [26]:
# Verify the features
print("X_train head:")
print(pd.DataFrame(X_train, columns=features).head())
print("\nX_test head:")
print(pd.DataFrame(X_test, columns=features).head())

X_train head:
     lights      T_in     RH_in     T_out  Windspeed
0 -0.549044  1.180176 -2.259513 -0.369220  -1.398616
1 -0.549044 -0.521587 -0.380509 -1.357738  -0.856082
2  1.677573  0.728145  0.938589  0.182511   0.409831
3  1.677573  1.007341 -1.288378 -0.525544  -0.613750
4 -0.549044  0.143164 -1.645831 -1.047390  -0.613750

X_test head:
     lights      T_in     RH_in     T_out  Windspeed
0 -0.549044 -1.126511 -0.981537 -1.465786  -1.036927
1 -0.549044  0.409065 -0.592451 -1.909469  -0.675238
2 -0.549044  0.056747  0.135110  0.504354   0.771520
3 -0.549044 -0.681127  0.555829  0.366421   0.048141
4 -0.549044  0.628433  1.008182  0.194005   0.228986


In [27]:
# Train the RandomForestRegressor with hyperparameter tuning
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score

In [28]:
# Define the parameter grid for tuning
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

In [29]:
# Initialize the RandomForestRegressor
rf = RandomForestRegressor(random_state=42)

In [30]:
# Perform GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

In [31]:
# Get the best model
best_rf = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)

Best parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}


In [32]:
# Make predictions with the best model
y_train_pred = best_rf.predict(X_train)
y_test_pred = best_rf.predict(X_test)

In [33]:
# Calculate metrics
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

In [34]:
# Print metrics
print('Random Forest Training MSE: {:.2f}'.format(train_mse))
print('Random Forest Testing MSE: {:.2f}'.format(test_mse))
print('Random Forest Training R2: {:.2f}'.format(train_r2))
print('Random Forest Testing R2: {:.2f}'.format(test_r2))

Random Forest Training MSE: 1174.13
Random Forest Testing MSE: 8055.66
Random Forest Training R2: 0.92
Random Forest Testing R2: 0.44


In [35]:
# Save the trained model and the scaler
import pickle
with open('random_forest_model.pkl', 'wb') as file:
    pickle.dump(best_rf, file)
with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

print("Model saved as random_forest_model.pkl")
print("Scaler saved as scaler.pkl")

Model saved as random_forest_model.pkl
Scaler saved as scaler.pkl
