In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import LabelEncoder
from scipy.stats import zscore
from sklearn.preprocessing import power_transform
from statsmodels.tsa.stattools import adfuller
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
import pickle

# Data Loading

Source:
    https://www.kaggle.com/datasets/nicholasjhana/energy-consumption-generation-prices-and-weather

    Sample (550 rows taken as sample)

In [2]:
weather_pred=pd.read_csv('weather_features.csv')
weather_pred

Unnamed: 0,dt_iso,city_name,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,2015-01-01 00:00:00+01:00,Valencia,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,clear,sky is clear,01n
1,2015-01-01 01:00:00+01:00,Valencia,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,clear,sky is clear,01n
2,2015-01-01 02:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,sky is clear,01n
3,2015-01-01 03:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,sky is clear,01n
4,2015-01-01 04:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,sky is clear,01n
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,2015-01-23 15:00:00+01:00,Valencia,285.7500,285.7500,285.7500,1018,31,3,22,0.0,0,0,0,800,clear,sky is clear,01d
545,2015-01-23 16:00:00+01:00,Valencia,286.0000,286.0000,286.0000,1018,32,4,176,0.0,0,0,0,800,clear,sky is clear,01
546,2015-01-23 17:00:00+01:00,Valencia,286.2500,286.2500,286.2500,1018,33,6,330,0.0,0,0,0,800,clear,sky is clear,01d
547,2015-01-23 18:00:00+01:00,Valencia,283.4785,283.4785,283.4785,1019,42,6,334,0.0,0,0,0,800,clear,sky is clear,01


# DataPreprocessing 

    1. Extracting date/time/month/year/hour/day/weekday/quarter

In [3]:
weather_pred['date']=pd.to_datetime(weather_pred['dt_iso'].str[:10])
weather_pred['time']=pd.to_datetime(weather_pred['dt_iso'].str[11:19])
weather_pred['month']=weather_pred['date'].dt.month
weather_pred['year']=weather_pred['date'].dt.year
weather_pred['hour']=weather_pred['time'].dt.hour
weather_pred['day']=weather_pred['date'].dt.day
weather_pred['weekday']=weather_pred['date'].dt.weekday
weather_pred['quarter']=weather_pred['date'].dt.quarter

In [4]:
weather_pred

Unnamed: 0,dt_iso,city_name,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,...,weather_description,weather_icon,date,time,month,year,hour,day,weekday,quarter
0,2015-01-01 00:00:00+01:00,Valencia,270.4750,270.4750,270.4750,1001,77,1,62,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 00:00:00,1,2015,0,1,3,1
1,2015-01-01 01:00:00+01:00,Valencia,270.4750,270.4750,270.4750,1001,77,1,62,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 01:00:00,1,2015,1,1,3,1
2,2015-01-01 02:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 02:00:00,1,2015,2,1,3,1
3,2015-01-01 03:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 03:00:00,1,2015,3,1,3,1
4,2015-01-01 04:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 04:00:00,1,2015,4,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,2015-01-23 15:00:00+01:00,Valencia,285.7500,285.7500,285.7500,1018,31,3,22,0.0,...,sky is clear,01d,2015-01-23,2024-03-20 15:00:00,1,2015,15,23,4,1
545,2015-01-23 16:00:00+01:00,Valencia,286.0000,286.0000,286.0000,1018,32,4,176,0.0,...,sky is clear,01,2015-01-23,2024-03-20 16:00:00,1,2015,16,23,4,1
546,2015-01-23 17:00:00+01:00,Valencia,286.2500,286.2500,286.2500,1018,33,6,330,0.0,...,sky is clear,01d,2015-01-23,2024-03-20 17:00:00,1,2015,17,23,4,1
547,2015-01-23 18:00:00+01:00,Valencia,283.4785,283.4785,283.4785,1019,42,6,334,0.0,...,sky is clear,01,2015-01-23,2024-03-20 18:00:00,1,2015,18,23,4,1


In [5]:
weather_pred.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 549 entries, 0 to 548
Data columns (total 25 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   dt_iso               549 non-null    object        
 1   city_name            549 non-null    object        
 2   temp                 549 non-null    float64       
 3   temp_min             549 non-null    float64       
 4   temp_max             549 non-null    float64       
 5   pressure             549 non-null    int64         
 6   humidity             549 non-null    int64         
 7   wind_speed           549 non-null    int64         
 8   wind_deg             549 non-null    int64         
 9   rain_1h              549 non-null    float64       
 10  rain_3h              549 non-null    int64         
 11  snow_3h              549 non-null    int64         
 12  clouds_all           549 non-null    int64         
 13  weather_id           549 non-null  

In [6]:
# Prepare the data for analysis.
# Drop the columns that are not required for analysis.
weather_pred.drop(['dt_iso','city_name','weather_description','date','time'],axis=1,inplace=True)

In [7]:
weather_pred

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_icon,month,year,hour,day,weekday,quarter
0,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,clear,01n,1,2015,0,1,3,1
1,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,clear,01n,1,2015,1,1,3,1
2,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,01n,1,2015,2,1,3,1
3,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,01n,1,2015,3,1,3,1
4,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,01n,1,2015,4,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,285.7500,285.7500,285.7500,1018,31,3,22,0.0,0,0,0,800,clear,01d,1,2015,15,23,4,1
545,286.0000,286.0000,286.0000,1018,32,4,176,0.0,0,0,0,800,clear,01,1,2015,16,23,4,1
546,286.2500,286.2500,286.2500,1018,33,6,330,0.0,0,0,0,800,clear,01d,1,2015,17,23,4,1
547,283.4785,283.4785,283.4785,1019,42,6,334,0.0,0,0,0,800,clear,01,1,2015,18,23,4,1


    4. Encoding the columns

In [8]:
# label encoding
le=LabelEncoder()
weather_pred['weather_main']=le.fit_transform(weather_pred['weather_main'])
weather_pred['weather_icon']=le.fit_transform(weather_pred['weather_icon'])
weather_pred

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_icon,month,year,hour,day,weekday,quarter
0,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,0,2,1,2015,0,1,3,1
1,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,0,2,1,2015,1,1,3,1
2,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,0,2,1,2015,2,1,3,1
3,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,0,2,1,2015,3,1,3,1
4,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,0,2,1,2015,4,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,285.7500,285.7500,285.7500,1018,31,3,22,0.0,0,0,0,800,0,1,1,2015,15,23,4,1
545,286.0000,286.0000,286.0000,1018,32,4,176,0.0,0,0,0,800,0,0,1,2015,16,23,4,1
546,286.2500,286.2500,286.2500,1018,33,6,330,0.0,0,0,0,800,0,1,1,2015,17,23,4,1
547,283.4785,283.4785,283.4785,1019,42,6,334,0.0,0,0,0,800,0,0,1,2015,18,23,4,1


In [9]:
weather_pred = pd.DataFrame(power_transform(weather_pred, method='yeo-johnson'), columns=weather_pred.columns)
weather_pred

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_icon,month,year,hour,day,weekday,quarter
0,-2.130012,-2.130012,-2.130012,-2.087219e-14,0.607204,-0.864232,-2.149694,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.904047,-1.847277,0.037765,0.0
1,-2.130012,-2.130012,-2.130012,-2.087219e-14,0.607204,-0.864232,-2.149694,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.651731,-1.847277,0.037765,0.0
2,-2.299595,-2.299595,-2.299595,-1.948441e-14,0.663627,-2.738422,-2.255995,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.434315,-1.847277,0.037765,0.0
3,-2.299595,-2.299595,-2.299595,-1.948441e-14,0.663627,-2.738422,-2.255995,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.236856,-1.847277,0.037765,0.0
4,-2.299595,-2.299595,-2.299595,-1.948441e-14,0.663627,-2.738422,-2.255995,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.053022,-1.847277,0.037765,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,0.867362,0.867362,0.867362,1.942890e-15,-1.850405,0.417983,-2.257442,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.719072,0.0,0.0,0.566981,1.556753,0.532276,0.0
545,0.912252,0.912252,0.912252,1.942890e-15,-1.800659,0.737448,-1.268582,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-1.619615,0.0,0.0,0.694240,1.556753,0.532276,0.0
546,0.957019,0.957019,0.957019,1.942890e-15,-1.750698,1.151053,1.300765,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.719072,0.0,0.0,0.819400,1.556753,0.532276,0.0
547,0.453728,0.453728,0.453728,3.275158e-15,-1.292152,1.151053,1.388863,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-1.619615,0.0,0.0,0.942609,1.556753,0.532276,0.0


## Feature Selection

    Train/Test Splitting the Data

In [10]:
# columns: temp	temp_min, temp_max, pressure, humidity, wind_speed, wind_deg, rain_1h, rain_3h, snow_3h,clouds_all,	weather_id,weather_main,weather_icon,month,	year,hour,day,weekday,quarter

X= weather_pred.drop('temp',axis=1)
y= weather_pred['temp']

In [11]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [12]:
k=5
model = RandomForestRegressor()
model.fit(X, y)
importances = model.feature_importances_
selected_feature_indices = np.argsort(importances)[-k:]
selected_features = X.columns[selected_feature_indices]

In [13]:
selected_features

Index(['day', 'wind_deg', 'humidity', 'temp_min', 'temp_max'], dtype='object')

# Model Selection and Training

### Depending on Data: Apply Suitable Models
    

    Apply Random Forest

In [14]:
# Apply random forest regressor on the features extracted from the above step.
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
y_pred_rf




array([-6.91683256e-01, -1.21950121e+00, -1.17970944e+00,  1.08722343e+00,
        3.71224030e-01, -4.23623629e-01,  1.42794137e+00,  8.12075337e-01,
        8.54921732e-01,  1.02838241e+00,  1.44283927e+00, -1.27983050e+00,
       -9.46275524e-01, -5.68062080e-01,  1.27622699e+00, -6.35170410e-01,
        1.71091796e-01, -2.17092655e+00, -9.21068062e-02, -2.47870563e-02,
        1.47467371e-01, -6.16740630e-01, -3.97673825e-01, -7.35836100e-01,
       -6.35170410e-01,  8.70868436e-01,  1.02724588e+00, -6.58978368e-03,
       -1.51437637e-01, -4.23330879e-01,  8.35778459e-01, -1.19856010e+00,
       -4.90363421e-01, -9.46275524e-01, -2.74103816e-01, -1.14662332e+00,
       -2.26966972e+00, -4.24034388e-01, -8.15529958e-01,  1.33245370e-01,
       -7.95655073e-01,  1.06000466e+00,  1.74618240e+00, -1.22250531e+00,
       -5.79351849e-01, -1.93512946e-01,  5.59046383e-01, -3.53667091e-01,
        1.55698986e+00,  5.21346243e-01, -5.40487066e-01, -6.81880011e-01,
        9.46653631e-01,  

    Model Evaluation: Random Forest Regressor

In [15]:

print('R2 score:',r2_score(y_test,y_pred_rf))
print('Mean Absolute Error:',mean_absolute_error(y_test,y_pred_rf))
print('Mean Squared Error:',mean_squared_error(y_test,y_pred_rf))
print('Root Mean Squared Error:',np.sqrt(mean_squared_error(y_test,y_pred_rf)))

R2 score: 0.9999405747542206
Mean Absolute Error: 0.004943421032607327
Mean Squared Error: 5.5910074251866284e-05
Root Mean Squared Error: 0.007477303942723359


In [16]:
print('Accuracy score:',rf.score(X_test,y_test))

Accuracy score: 0.9999405747542206


    Apply XGBoost

In [17]:
# apply xgboost regressor
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
y_pred_xgb


array([-0.68429613, -1.2609607 , -1.198984  ,  1.0868733 ,  0.34644493,
       -0.42407247,  1.4319285 ,  0.8335811 ,  0.8528752 ,  1.0617822 ,
        1.418614  , -1.2687613 , -0.94664794, -0.57606786,  1.2629614 ,
       -0.6362848 ,  0.17545748, -2.169089  , -0.12219261, -0.02623381,
        0.15592848, -0.647174  , -0.39152747, -0.7386956 , -0.6362848 ,
        0.8871129 ,  1.0703466 ,  0.01863823, -0.1562588 , -0.42038906,
        0.82296747, -1.1993845 , -0.51832235, -0.9454393 , -0.27191746,
       -1.1398311 , -2.2991025 , -0.42324513, -0.8193335 ,  0.13136177,
       -0.7891698 ,  1.0871425 ,  1.7154815 , -1.2608416 , -0.5776022 ,
       -0.18022189,  0.54315984, -0.38924247,  1.531399  ,  0.5273985 ,
       -0.5361079 , -0.68569946,  0.9214841 ,  0.77696764,  0.57975197,
       -1.8123077 , -0.40661824, -1.2047956 ,  1.5373126 , -0.22697082,
       -1.1444763 ,  1.6101233 , -2.1725419 , -1.1562362 , -0.774244  ,
       -1.2687613 , -0.42390302, -0.07791135, -0.33467302, -0.05

    Model Evaluation: XGBoost

In [18]:
print('R2 score:',r2_score(y_test,y_pred_rf))
print('Mean Absolute Error:',mean_absolute_error(y_test,y_pred_rf))
print('Mean Squared Error:',mean_squared_error(y_test,y_pred_rf))
print('Root Mean Squared Error:',np.sqrt(mean_squared_error(y_test,y_pred_rf)))

R2 score: 0.9999405747542206
Mean Absolute Error: 0.004943421032607327
Mean Squared Error: 5.5910074251866284e-05
Root Mean Squared Error: 0.007477303942723359


# Optimizing the above models by finding the best hyper parameters

    Random Forest (Optimised)

In [19]:
rf_op = RandomForestRegressor()

# Define the hyperparameter grid
parameters = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 4, 6],
    'min_samples_leaf': [1, 3, 5]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=rf_op, param_grid=parameters, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best parameters and best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Use the best model to make predictions
y_pred_rf_op = best_model.predict(X_test)



In [20]:
# Apply Random Forest Regressor on Optimised Parameters
rf_op = RandomForestRegressor()
rf_op.fit(X_train, y_train)
y_pred_rf_op = rf_op.predict(X_test)
y_pred_rf_op

array([-6.89480476e-01, -1.20840808e+00, -1.17588614e+00,  1.08832359e+00,
        3.70637897e-01, -4.22972975e-01,  1.43244508e+00,  8.09357995e-01,
        8.53723116e-01,  1.03124614e+00,  1.44386747e+00, -1.28125881e+00,
       -9.43951733e-01, -5.63805111e-01,  1.27397225e+00, -6.34389378e-01,
        1.70163891e-01, -2.16797918e+00, -9.52005395e-02, -2.73789837e-02,
        1.46466947e-01, -6.23713449e-01, -3.98700562e-01, -7.29417850e-01,
       -6.34389378e-01,  8.69185681e-01,  1.03000049e+00, -5.18504199e-03,
       -1.52735838e-01, -4.22997520e-01,  8.33035657e-01, -1.19230734e+00,
       -4.92067896e-01, -9.43951733e-01, -2.75731041e-01, -1.14523811e+00,
       -2.27765950e+00, -4.23248630e-01, -8.17749871e-01,  1.36291504e-01,
       -7.93837740e-01,  1.05806558e+00,  1.73909182e+00, -1.21133193e+00,
       -5.76688501e-01, -1.94512223e-01,  5.65381810e-01, -3.53524184e-01,
        1.56181446e+00,  5.23367276e-01, -5.40489291e-01, -6.83809399e-01,
        9.46373320e-01,  

In [21]:
# calculate accuracy
print('Accuracy score:',rf_op.score(X_test,y_test))
print('R2 score:',r2_score(y_test,y_pred_rf_op))
print('Mean Absolute Error:',mean_absolute_error(y_test,y_pred_rf_op))
print('Mean Squared Error:',mean_squared_error(y_test,y_pred_rf_op))
print('Root Mean Squared Error:',np.sqrt(mean_squared_error(y_test,y_pred_rf_op)))

Accuracy score: 0.9999239865154658
R2 score: 0.9999239865154658
Mean Absolute Error: 0.005643721715708343
Mean Squared Error: 7.151707172118292e-05
Root Mean Squared Error: 0.008456776674429976


    XGBoost (Optimised)

In [22]:
xgb_op=XGBRegressor()

# Define the hyperparameter grid
parameters = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'learning_rate': [0.1, 0.2, 0.3]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=xgb_op, param_grid=parameters, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best parameters and best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Use the best model to make predictions
y_pred_xgb_op = best_model.predict(X_test)


In [23]:
# Apply XGBoost Regressor on Optimised Parameters
xgb_op = XGBRegressor()
xgb_op.fit(X_train, y_train)
y_pred_xgb_op = xgb_op.predict(X_test)
y_pred_xgb_op


array([-0.68429613, -1.2609607 , -1.198984  ,  1.0868733 ,  0.34644493,
       -0.42407247,  1.4319285 ,  0.8335811 ,  0.8528752 ,  1.0617822 ,
        1.418614  , -1.2687613 , -0.94664794, -0.57606786,  1.2629614 ,
       -0.6362848 ,  0.17545748, -2.169089  , -0.12219261, -0.02623381,
        0.15592848, -0.647174  , -0.39152747, -0.7386956 , -0.6362848 ,
        0.8871129 ,  1.0703466 ,  0.01863823, -0.1562588 , -0.42038906,
        0.82296747, -1.1993845 , -0.51832235, -0.9454393 , -0.27191746,
       -1.1398311 , -2.2991025 , -0.42324513, -0.8193335 ,  0.13136177,
       -0.7891698 ,  1.0871425 ,  1.7154815 , -1.2608416 , -0.5776022 ,
       -0.18022189,  0.54315984, -0.38924247,  1.531399  ,  0.5273985 ,
       -0.5361079 , -0.68569946,  0.9214841 ,  0.77696764,  0.57975197,
       -1.8123077 , -0.40661824, -1.2047956 ,  1.5373126 , -0.22697082,
       -1.1444763 ,  1.6101233 , -2.1725419 , -1.1562362 , -0.774244  ,
       -1.2687613 , -0.42390302, -0.07791135, -0.33467302, -0.05

In [24]:
# CALCULATE ACCURACY
print('Accuracy score:',xgb_op.score(X_test,y_test))
print('R2 score:',r2_score(y_test,y_pred_xgb_op))
print('Mean Absolute Error:',mean_absolute_error(y_test,y_pred_xgb_op))
print('Mean Squared Error:',mean_squared_error(y_test,y_pred_xgb_op))
print('Root Mean Squared Error:',np.sqrt(mean_squared_error(y_test,y_pred_xgb_op)))

Accuracy score: 0.9996949204928451
R2 score: 0.9996949204928451
Mean Absolute Error: 0.010975589937980932
Mean Squared Error: 0.00028703319059169717
Root Mean Squared Error: 0.016942053907118143


# Creating Pickle files

In [25]:
# Creating pickle files for Random Forest Regressor (optimised)
filename_rf = 'weather_pred_rf.pkl'
pickle.dump(rf_op, open(filename_rf, 'wb'))

# Creating pickle files for XGBoost Regressor (optimised)
filename_xgb = 'weather_pred_xgb.pkl'
pickle.dump(xgb_op, open(filename_xgb, 'wb'))



In [26]:
# load the model
loaded_model_rf = pickle.load(open(filename_rf, 'rb'))
result_rf = loaded_model_rf.score(X_test, y_test)
print(result_rf)

0.9999239865154658


In [27]:
# load the model
loaded_model_xgb = pickle.load(open(filename_xgb, 'rb'))
result_xgb = loaded_model_xgb.score(X_test, y_test)
print(result_xgb)

0.9996949204928451
