In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import LabelEncoder
from scipy.stats import zscore
from sklearn.preprocessing import power_transform
from statsmodels.tsa.stattools import adfuller
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
import pickle

# Data Loading

Source:
    https://www.kaggle.com/datasets/nicholasjhana/energy-consumption-generation-prices-and-weather

    Sample (550 rows taken as sample)

In [2]:
weather_pred=pd.read_csv('weather_features.csv')
weather_pred

Unnamed: 0,dt_iso,city_name,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,2015-01-01 00:00:00+01:00,Valencia,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,clear,sky is clear,01n
1,2015-01-01 01:00:00+01:00,Valencia,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,clear,sky is clear,01n
2,2015-01-01 02:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,sky is clear,01n
3,2015-01-01 03:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,sky is clear,01n
4,2015-01-01 04:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,sky is clear,01n
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,2015-01-23 15:00:00+01:00,Valencia,285.7500,285.7500,285.7500,1018,31,3,22,0.0,0,0,0,800,clear,sky is clear,01d
545,2015-01-23 16:00:00+01:00,Valencia,286.0000,286.0000,286.0000,1018,32,4,176,0.0,0,0,0,800,clear,sky is clear,01
546,2015-01-23 17:00:00+01:00,Valencia,286.2500,286.2500,286.2500,1018,33,6,330,0.0,0,0,0,800,clear,sky is clear,01d
547,2015-01-23 18:00:00+01:00,Valencia,283.4785,283.4785,283.4785,1019,42,6,334,0.0,0,0,0,800,clear,sky is clear,01


# DataPreprocessing 

    1. Extracting date/time/month/year/hour/day/weekday/quarter

In [3]:
weather_pred['date']=pd.to_datetime(weather_pred['dt_iso'].str[:10])
weather_pred['time']=pd.to_datetime(weather_pred['dt_iso'].str[11:19])
weather_pred['month']=weather_pred['date'].dt.month
weather_pred['year']=weather_pred['date'].dt.year
weather_pred['hour']=weather_pred['time'].dt.hour
weather_pred['day']=weather_pred['date'].dt.day
weather_pred['weekday']=weather_pred['date'].dt.weekday
weather_pred['quarter']=weather_pred['date'].dt.quarter

In [4]:
weather_pred

Unnamed: 0,dt_iso,city_name,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,...,weather_description,weather_icon,date,time,month,year,hour,day,weekday,quarter
0,2015-01-01 00:00:00+01:00,Valencia,270.4750,270.4750,270.4750,1001,77,1,62,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 00:00:00,1,2015,0,1,3,1
1,2015-01-01 01:00:00+01:00,Valencia,270.4750,270.4750,270.4750,1001,77,1,62,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 01:00:00,1,2015,1,1,3,1
2,2015-01-01 02:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 02:00:00,1,2015,2,1,3,1
3,2015-01-01 03:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 03:00:00,1,2015,3,1,3,1
4,2015-01-01 04:00:00+01:00,Valencia,269.6860,269.6860,269.6860,1002,78,0,23,0.0,...,sky is clear,01n,2015-01-01,2024-03-20 04:00:00,1,2015,4,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,2015-01-23 15:00:00+01:00,Valencia,285.7500,285.7500,285.7500,1018,31,3,22,0.0,...,sky is clear,01d,2015-01-23,2024-03-20 15:00:00,1,2015,15,23,4,1
545,2015-01-23 16:00:00+01:00,Valencia,286.0000,286.0000,286.0000,1018,32,4,176,0.0,...,sky is clear,01,2015-01-23,2024-03-20 16:00:00,1,2015,16,23,4,1
546,2015-01-23 17:00:00+01:00,Valencia,286.2500,286.2500,286.2500,1018,33,6,330,0.0,...,sky is clear,01d,2015-01-23,2024-03-20 17:00:00,1,2015,17,23,4,1
547,2015-01-23 18:00:00+01:00,Valencia,283.4785,283.4785,283.4785,1019,42,6,334,0.0,...,sky is clear,01,2015-01-23,2024-03-20 18:00:00,1,2015,18,23,4,1


In [5]:
weather_pred.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 549 entries, 0 to 548
Data columns (total 25 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   dt_iso               549 non-null    object        
 1   city_name            549 non-null    object        
 2   temp                 549 non-null    float64       
 3   temp_min             549 non-null    float64       
 4   temp_max             549 non-null    float64       
 5   pressure             549 non-null    int64         
 6   humidity             549 non-null    int64         
 7   wind_speed           549 non-null    int64         
 8   wind_deg             549 non-null    int64         
 9   rain_1h              549 non-null    float64       
 10  rain_3h              549 non-null    int64         
 11  snow_3h              549 non-null    int64         
 12  clouds_all           549 non-null    int64         
 13  weather_id           549 non-null  

In [6]:
# Prepare the data for analysis.
# Drop the columns that are not required for analysis.
weather_pred.drop(['dt_iso','city_name','weather_description','date','time'],axis=1,inplace=True)

In [7]:
weather_pred

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_icon,month,year,hour,day,weekday,quarter
0,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,clear,01n,1,2015,0,1,3,1
1,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,clear,01n,1,2015,1,1,3,1
2,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,01n,1,2015,2,1,3,1
3,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,01n,1,2015,3,1,3,1
4,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,clear,01n,1,2015,4,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,285.7500,285.7500,285.7500,1018,31,3,22,0.0,0,0,0,800,clear,01d,1,2015,15,23,4,1
545,286.0000,286.0000,286.0000,1018,32,4,176,0.0,0,0,0,800,clear,01,1,2015,16,23,4,1
546,286.2500,286.2500,286.2500,1018,33,6,330,0.0,0,0,0,800,clear,01d,1,2015,17,23,4,1
547,283.4785,283.4785,283.4785,1019,42,6,334,0.0,0,0,0,800,clear,01,1,2015,18,23,4,1


    4. Encoding the columns

In [8]:
# label encoding
le=LabelEncoder()
weather_pred['weather_main']=le.fit_transform(weather_pred['weather_main'])
weather_pred['weather_icon']=le.fit_transform(weather_pred['weather_icon'])
weather_pred

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_icon,month,year,hour,day,weekday,quarter
0,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,0,2,1,2015,0,1,3,1
1,270.4750,270.4750,270.4750,1001,77,1,62,0.0,0,0,0,800,0,2,1,2015,1,1,3,1
2,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,0,2,1,2015,2,1,3,1
3,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,0,2,1,2015,3,1,3,1
4,269.6860,269.6860,269.6860,1002,78,0,23,0.0,0,0,0,800,0,2,1,2015,4,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,285.7500,285.7500,285.7500,1018,31,3,22,0.0,0,0,0,800,0,1,1,2015,15,23,4,1
545,286.0000,286.0000,286.0000,1018,32,4,176,0.0,0,0,0,800,0,0,1,2015,16,23,4,1
546,286.2500,286.2500,286.2500,1018,33,6,330,0.0,0,0,0,800,0,1,1,2015,17,23,4,1
547,283.4785,283.4785,283.4785,1019,42,6,334,0.0,0,0,0,800,0,0,1,2015,18,23,4,1


In [9]:
weather_pred = pd.DataFrame(power_transform(weather_pred, method='yeo-johnson'), columns=weather_pred.columns)
weather_pred

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_icon,month,year,hour,day,weekday,quarter
0,-2.130012,-2.130012,-2.130012,-2.087219e-14,0.607204,-0.864232,-2.149694,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.904047,-1.847277,0.037765,0.0
1,-2.130012,-2.130012,-2.130012,-2.087219e-14,0.607204,-0.864232,-2.149694,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.651731,-1.847277,0.037765,0.0
2,-2.299595,-2.299595,-2.299595,-1.948441e-14,0.663627,-2.738422,-2.255995,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.434315,-1.847277,0.037765,0.0
3,-2.299595,-2.299595,-2.299595,-1.948441e-14,0.663627,-2.738422,-2.255995,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.236856,-1.847277,0.037765,0.0
4,-2.299595,-2.299595,-2.299595,-1.948441e-14,0.663627,-2.738422,-2.255995,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.214447,0.0,0.0,-1.053022,-1.847277,0.037765,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,0.867362,0.867362,0.867362,1.942890e-15,-1.850405,0.417983,-2.257442,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.719072,0.0,0.0,0.566981,1.556753,0.532276,0.0
545,0.912252,0.912252,0.912252,1.942890e-15,-1.800659,0.737448,-1.268582,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-1.619615,0.0,0.0,0.694240,1.556753,0.532276,0.0
546,0.957019,0.957019,0.957019,1.942890e-15,-1.750698,1.151053,1.300765,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-0.719072,0.0,0.0,0.819400,1.556753,0.532276,0.0
547,0.453728,0.453728,0.453728,3.275158e-15,-1.292152,1.151053,1.388863,-0.256942,0.0,0.0,-0.803895,0.116101,-0.76672,-1.619615,0.0,0.0,0.942609,1.556753,0.532276,0.0


## Feature Selection

    Train/Test Splitting the Data

In [11]:
# columns: temp	temp_min, temp_max, pressure, humidity, wind_speed, wind_deg, rain_1h, rain_3h, snow_3h,clouds_all,	weather_id,weather_main,weather_icon,month,	year,hour,day,weekday,quarter

X= weather_pred.drop('temp',axis=1)
y= weather_pred['temp']

In [12]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [13]:
k=5
model = RandomForestRegressor()
model.fit(X, y)
importances = model.feature_importances_
selected_feature_indices = np.argsort(importances)[-k:]
selected_features = X.columns[selected_feature_indices]

In [14]:
selected_features

Index(['day', 'wind_deg', 'humidity', 'temp_min', 'temp_max'], dtype='object')

# Model Selection and Training

### Depending on Data: Apply Suitable Models
    

    Apply Random Forest

In [15]:
# Apply random forest regressor on the features extracted from the above step.
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
y_pred_rf




array([-6.89548825e-01, -1.21829234e+00, -1.18001627e+00,  1.08745797e+00,
        3.70637363e-01, -4.22868726e-01,  1.44023624e+00,  8.13828779e-01,
        8.53296871e-01,  1.03141587e+00,  1.44833171e+00, -1.27663083e+00,
       -9.45567294e-01, -5.64725683e-01,  1.27524835e+00, -6.33390052e-01,
        1.71464672e-01, -2.16716315e+00, -9.64411074e-02, -2.89239823e-02,
        1.46018231e-01, -6.20890307e-01, -3.97544485e-01, -7.37014238e-01,
       -6.33390052e-01,  8.70093613e-01,  1.03393076e+00, -8.04488432e-03,
       -1.52232884e-01, -4.22744949e-01,  8.36598334e-01, -1.19862334e+00,
       -4.92235747e-01, -9.45567294e-01, -2.73428926e-01, -1.14034913e+00,
       -2.26709648e+00, -4.22888004e-01, -8.15815707e-01,  1.32268142e-01,
       -7.93972094e-01,  1.06065737e+00,  1.74395283e+00, -1.22203773e+00,
       -5.77429219e-01, -1.92890718e-01,  5.61790491e-01, -3.54705052e-01,
        1.55189974e+00,  5.20993721e-01, -5.41270121e-01, -6.83058647e-01,
        9.48591390e-01,  

    Model Evaluation: Random Forest Regressor

In [16]:

print('R2 score:',r2_score(y_test,y_pred_rf))
print('Mean Absolute Error:',mean_absolute_error(y_test,y_pred_rf))
print('Mean Squared Error:',mean_squared_error(y_test,y_pred_rf))
print('Root Mean Squared Error:',np.sqrt(mean_squared_error(y_test,y_pred_rf)))

R2 score: 0.9999271361491838
Mean Absolute Error: 0.005044151559974761
Mean Squared Error: 6.855374775450056e-05
Root Mean Squared Error: 0.008279719062534704


In [17]:
print('Accuracy score:',rf.score(X_test,y_test))

Accuracy score: 0.9999271361491838


    Apply XGBoost

In [18]:
# apply xgboost regressor
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
y_pred_xgb


array([-0.68429613, -1.2609607 , -1.198984  ,  1.0868733 ,  0.34644493,
       -0.42407247,  1.4319285 ,  0.8335811 ,  0.8528752 ,  1.0617822 ,
        1.418614  , -1.2687613 , -0.94664794, -0.57606786,  1.2629614 ,
       -0.6362848 ,  0.17545748, -2.169089  , -0.12219261, -0.02623381,
        0.15592848, -0.647174  , -0.39152747, -0.7386956 , -0.6362848 ,
        0.8871129 ,  1.0703466 ,  0.01863823, -0.1562588 , -0.42038906,
        0.82296747, -1.1993845 , -0.51832235, -0.9454393 , -0.27191746,
       -1.1398311 , -2.2991025 , -0.42324513, -0.8193335 ,  0.13136177,
       -0.7891698 ,  1.0871425 ,  1.7154815 , -1.2608416 , -0.5776022 ,
       -0.18022189,  0.54315984, -0.38924247,  1.531399  ,  0.5273985 ,
       -0.5361079 , -0.68569946,  0.9214841 ,  0.77696764,  0.57975197,
       -1.8123077 , -0.40661824, -1.2047956 ,  1.5373126 , -0.22697082,
       -1.1444763 ,  1.6101233 , -2.1725419 , -1.1562362 , -0.774244  ,
       -1.2687613 , -0.42390302, -0.07791135, -0.33467302, -0.05

    Model Evaluation: XGBoost

In [19]:
print('R2 score:',r2_score(y_test,y_pred_rf))
print('Mean Absolute Error:',mean_absolute_error(y_test,y_pred_rf))
print('Mean Squared Error:',mean_squared_error(y_test,y_pred_rf))
print('Root Mean Squared Error:',np.sqrt(mean_squared_error(y_test,y_pred_rf)))

R2 score: 0.9999271361491838
Mean Absolute Error: 0.005044151559974761
Mean Squared Error: 6.855374775450056e-05
Root Mean Squared Error: 0.008279719062534704


# Optimizing the above models by finding the best hyper parameters

    Random Forest (Optimised)

In [20]:
rf_op = RandomForestRegressor()

# Define the hyperparameter grid
parameters = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 4, 6],
    'min_samples_leaf': [1, 3, 5]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=rf_op, param_grid=parameters, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best parameters and best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Use the best model to make predictions
y_pred_rf_op = best_model.predict(X_test)



In [21]:
# Apply Random Forest Regressor on Optimised Parameters
rf_op = RandomForestRegressor()
rf_op.fit(X_train, y_train)
y_pred_rf_op = rf_op.predict(X_test)
y_pred_rf_op

array([-0.6940495 , -1.22177968, -1.18646698,  1.08688497,  0.36952017,
       -0.4228962 ,  1.43417766,  0.812285  ,  0.85663385,  1.03313163,
        1.44616997, -1.27562078, -0.94627592, -0.5643998 ,  1.27328842,
       -0.63300841,  0.17345848, -2.1699699 , -0.09498586, -0.02858037,
        0.14385862, -0.61764275, -0.39770968, -0.73386909, -0.63300841,
        0.87013567,  1.03233539, -0.00912186, -0.15122168, -0.42305444,
        0.83342164, -1.19796949, -0.49146895, -0.94627592, -0.27499802,
       -1.14535266, -2.27307392, -0.42281826, -0.81763045,  0.13380929,
       -0.79637317,  1.06048613,  1.74494909, -1.22440216, -0.57749473,
       -0.19453949,  0.56097   , -0.35606042,  1.55948518,  0.52177781,
       -0.5404749 , -0.68767888,  0.94649282,  0.77598127,  0.57525125,
       -1.80831987, -0.42268543, -1.20233485,  1.51125141, -0.22886288,
       -1.17247914,  1.66058687, -2.16852448, -1.15953677, -0.77301374,
       -1.27510899, -0.42411787, -0.08216066, -0.34689419, -0.08

In [22]:
# calculate accuracy
print('Accuracy score:',rf_op.score(X_test,y_test))
print('R2 score:',r2_score(y_test,y_pred_rf_op))
print('Mean Absolute Error:',mean_absolute_error(y_test,y_pred_rf_op))
print('Mean Squared Error:',mean_squared_error(y_test,y_pred_rf_op))
print('Root Mean Squared Error:',np.sqrt(mean_squared_error(y_test,y_pred_rf_op)))

Accuracy score: 0.9999419580019461
R2 score: 0.9999419580019461
Mean Absolute Error: 0.004991386169919285
Mean Squared Error: 5.460864954548276e-05
Root Mean Squared Error: 0.007389766542014894


    XGBoost (Optimised)

In [23]:
xgb_op=XGBRegressor()

# Define the hyperparameter grid
parameters = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'learning_rate': [0.1, 0.2, 0.3]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(estimator=xgb_op, param_grid=parameters, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best parameters and best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Use the best model to make predictions
y_pred_xgb_op = best_model.predict(X_test)


In [24]:
# Apply XGBoost Regressor on Optimised Parameters
xgb_op = XGBRegressor()
xgb_op.fit(X_train, y_train)
y_pred_xgb_op = xgb_op.predict(X_test)
y_pred_xgb_op


array([-0.68429613, -1.2609607 , -1.198984  ,  1.0868733 ,  0.34644493,
       -0.42407247,  1.4319285 ,  0.8335811 ,  0.8528752 ,  1.0617822 ,
        1.418614  , -1.2687613 , -0.94664794, -0.57606786,  1.2629614 ,
       -0.6362848 ,  0.17545748, -2.169089  , -0.12219261, -0.02623381,
        0.15592848, -0.647174  , -0.39152747, -0.7386956 , -0.6362848 ,
        0.8871129 ,  1.0703466 ,  0.01863823, -0.1562588 , -0.42038906,
        0.82296747, -1.1993845 , -0.51832235, -0.9454393 , -0.27191746,
       -1.1398311 , -2.2991025 , -0.42324513, -0.8193335 ,  0.13136177,
       -0.7891698 ,  1.0871425 ,  1.7154815 , -1.2608416 , -0.5776022 ,
       -0.18022189,  0.54315984, -0.38924247,  1.531399  ,  0.5273985 ,
       -0.5361079 , -0.68569946,  0.9214841 ,  0.77696764,  0.57975197,
       -1.8123077 , -0.40661824, -1.2047956 ,  1.5373126 , -0.22697082,
       -1.1444763 ,  1.6101233 , -2.1725419 , -1.1562362 , -0.774244  ,
       -1.2687613 , -0.42390302, -0.07791135, -0.33467302, -0.05

In [25]:
# CALCULATE ACCURACY
print('Accuracy score:',xgb_op.score(X_test,y_test))
print('R2 score:',r2_score(y_test,y_pred_xgb_op))
print('Mean Absolute Error:',mean_absolute_error(y_test,y_pred_xgb_op))
print('Mean Squared Error:',mean_squared_error(y_test,y_pred_xgb_op))
print('Root Mean Squared Error:',np.sqrt(mean_squared_error(y_test,y_pred_xgb_op)))

Accuracy score: 0.9996949204928451
R2 score: 0.9996949204928451
Mean Absolute Error: 0.010975589937980932
Mean Squared Error: 0.00028703319059169717
Root Mean Squared Error: 0.016942053907118143


# Creating Pickle files

In [26]:
# Creating pickle files for Random Forest Regressor (optimised)
filename_rf = 'weather_pred_rf.pkl'
pickle.dump(rf_op, open(filename_rf, 'wb'))

# Creating pickle files for XGBoost Regressor (optimised)
filename_xgb = 'weather_pred_xgb.pkl'
pickle.dump(xgb_op, open(filename_xgb, 'wb'))



In [27]:
# load the model
loaded_model_rf = pickle.load(open(filename_rf, 'rb'))
result_rf = loaded_model_rf.score(X_test, y_test)
print(result_rf)

0.9999419580019461


In [28]:
# load the model
loaded_model_xgb = pickle.load(open(filename_xgb, 'rb'))
result_xgb = loaded_model_xgb.score(X_test, y_test)
print(result_xgb)

0.9996949204928451


In [29]:
!pip3 install sklearn

Collecting sklearn
  Downloading sklearn-0.0.post12.tar.gz (2.6 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'error'


  error: subprocess-exited-with-error
  
  × Getting requirements to build wheel did not run successfully.
  │ exit code: 1
  ╰─> [15 lines of output]
      The 'sklearn' PyPI package is deprecated, use 'scikit-learn'
      rather than 'sklearn' for pip commands.
      
      Here is how to fix this error in the main use cases:
      - use 'pip install scikit-learn' rather than 'pip install sklearn'
      - replace 'sklearn' by 'scikit-learn' in your pip requirements files
        (requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)
      - if the 'sklearn' package is used by one of your dependencies,
        it would be great if you take some time to track which package uses
        'sklearn' instead of 'scikit-learn' and report it to their issue tracker
      - as a last resort, set the environment variable
        SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True to avoid this error
      
      More information is available at
      https://github.com/scikit-learn/sklearn-

In [31]:
!pip3 install scikit-learn==1.3.2

