# Importing Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# loading the data

In [2]:
train_df = pd.read_csv('df_train.csv')

In [3]:
train_df.shape

(8763, 49)

In [4]:
test_df = pd.read_csv('df_test.csv')

In [5]:
import pickle
model_load_path = "saved_ridge.pkl"
with open(model_load_path,'rb') as file:
    unpickled_model = pickle.load(file)

In [6]:
# get the keys
unpickled_model.keys()

dict_keys(['mse_mlr_train', 'mse_mlr_test', 'rmse_mlr', 'mlr', 'test_clean_df', 'train_clean_df', 'mse_ridge_training', 'mse_ridge_test', 'rmse_ridge', 'ridge'])

In [7]:
# retrieve train data and test data
train_clean_df = unpickled_model['train_clean_df']
test_clean_df = unpickled_model['test_clean_df']

In [8]:
train_clean_df.shape

(8763, 51)

In [9]:
test_clean_df.shape

(2920, 50)

# Random Forest Model

In [44]:
# separate data into prdictors and respons
X = train_clean_df.drop("load_shortfall_3h", axis=1)
y = train_clean_df['load_shortfall_3h']

In [45]:
# train test split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train test split
x_train, x_test, y_train,y_test = train_test_split(X_scaled,y,test_size=0.2,random_state=42)

Without hyperparameter tuning

In [47]:
from sklearn.ensemble import RandomForestRegressor

# Our forest consists of 100 trees with a max depth of 5 in this example
RF = RandomForestRegressor(n_estimators=100, max_depth=None)
RF.fit(x_train,y_train)

RandomForestRegressor()

In [49]:
from sklearn.metrics import mean_squared_error
# Get predictions
y_pred = RF.predict(x_test)

# Compute RMSE
print("RMSE without tuning:",np.sqrt(mean_squared_error(y_test,y_pred)))

RMSE without tuning: 3657.303585556994


Gridsearchcv tuning

In [55]:
from sklearn.ensemble import RandomForestRegressor

# Our forest consists of 100 trees with a max depth of 5 in this example
grid_rf = RandomForestRegressor(bootstrap=False, max_features='sqrt', max_depth=None, min_samples_split=2, n_estimators=100)
grid_rf.fit(x_train,y_train)

RandomForestRegressor(bootstrap=False, max_features='sqrt')

In [56]:
from sklearn.metrics import mean_squared_error
# Get predictions
y_pred_grid = grid_rf.predict(x_test)

# Compute RMSE
print("RMSE without tuning:",np.sqrt(mean_squared_error(y_test,y_pred_grid)))

RMSE without tuning: 3645.540840532587


Randomized search

In [57]:
from sklearn.ensemble import RandomForestRegressor

# Our forest consists of 100 trees with a max depth of 5 in this example
random_rf = RandomForestRegressor(bootstrap=True, max_features='auto', max_depth=None, min_samples_split=4, n_estimators=100)
random_rf.fit(x_train,y_train)

RandomForestRegressor(min_samples_split=4)

In [58]:
from sklearn.metrics import mean_squared_error
# Get predictions
y_pred_random = random_rf.predict(x_test)

# Compute RMSE
print("RMSE without tuning:",np.sqrt(mean_squared_error(y_test,y_pred_random)))

RMSE without tuning: 3676.9336739795444
