In [2]:
import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")
%matplotlib inline


from math import sqrt
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

### Data

In [3]:
df_UK = pd.read_csv("UK_cleaned.csv")
df_UK.drop(columns=['Unnamed: 0'],inplace=True)
df_UK.head()

Unnamed: 0,Name,Date time,Maximum Temperature,Minimum Temperature,Temperature,Precipitation,Wind Speed,Visibility,Cloud Cover,Relative Humidity,Conditions,Clear,Overcast,Partially cloudy,Rain,"Rain, Overcast","Rain, Partially cloudy",Snow,"Snow, Overcast","Snow, Partially cloudy"
0,uk,01/01/1973,44.6,30.3,40.4,0.0,11.3,1.6,65.2,87.83,Partially cloudy,0,0,1,0,0,0,0,0,0
1,uk,01/02/1973,46.5,31.3,41.8,0.01,11.4,2.0,51.7,87.27,"Rain, Partially cloudy",0,0,0,0,0,1,0,0,0
2,uk,01/03/1973,53.6,46.4,49.6,0.0,11.4,5.4,90.5,84.87,Overcast,0,1,0,0,0,0,0,0,0
3,uk,01/04/1973,51.9,45.7,50.4,0.0,5.6,1.3,97.5,90.75,Overcast,0,1,0,0,0,0,0,0,0
4,uk,01/05/1973,48.3,39.3,43.7,0.0,14.9,3.9,77.8,81.33,Overcast,0,1,0,0,0,0,0,0,0


In [4]:
df_UK.shape

(17472, 20)

## K-NN Regression for Temperature

### Create Train and Test set

In [5]:
#Considering Visibilty, Cloud Cover and Humidity
X_temp=df_UK.drop(columns=['Maximum Temperature','Minimum Temperature','Temperature','Precipitation','Conditions',
                            'Name','Date time','Wind Speed'],axis=1)
Y_temp=df_UK.Temperature

In [6]:
from sklearn.model_selection import train_test_split
train_X_temp,test_X_temp,train_y_temp,test_y_temp = train_test_split(X_temp,Y_temp,test_size=0.4)

In [7]:
train_X_temp.head()

Unnamed: 0,Visibility,Cloud Cover,Relative Humidity,Clear,Overcast,Partially cloudy,Rain,"Rain, Overcast","Rain, Partially cloudy",Snow,"Snow, Overcast","Snow, Partially cloudy"
15897,24.5,0.3,69.48,0,0,0,1,0,0,0,0,0
15199,18.9,0.3,69.68,0,0,0,1,0,0,0,0,0
4632,12.7,46.7,62.03,0,0,1,0,0,0,0,0,0
5870,8.0,67.7,83.76,0,0,0,0,0,1,0,0,0
7429,26.9,17.0,58.52,1,0,0,0,0,0,0,0,0


### Preprocessing – Scaling the features

In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
x_scaled= MinMaxScaler()
train_X_temp[['Visibility','Relative Humidity']] = x_scaled.fit_transform(train_X_temp[['Visibility','Relative Humidity']])

x_test_scaled= MinMaxScaler()
test_X_temp[['Visibility','Relative Humidity']]=x_test_scaled.fit_transform(test_X_temp[['Visibility','Relative Humidity']])

In [10]:
train_X_temp.shape

(10483, 12)

In [11]:
test_X_temp.shape

(6989, 12)

## Implementing RandomizedSearchCV

### For Decision Tree

In [12]:
#Note: Max_depth can be done
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
dc= DecisionTreeRegressor(random_state=1234)
params = {'max_features': ['auto', 'sqrt', 'log2'],
          'min_samples_split': [2,3,4,5,6,7,8,9,10,11,12,13,14,15], 
          'min_samples_leaf':[1,2,3,4,5,6,7,8,9,10,11],
          'max_depth':[i for i in range(1,15)]}
#Making models with hyper parameters sets
dc = RandomizedSearchCV(dc, param_distributions=params,cv=10, n_jobs=-1)
#Learning
dc.fit(train_X_temp,train_y_temp)
#The best hyper parameters set
print("Best Hyper Parameters:",dc.best_params_)

Best Hyper Parameters: {'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 6}


### For Knn

In [13]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
knn = KNeighborsRegressor()
params = {'n_neighbors':[i for i in range(5,20)],
         'leaf_size':[1,2,3,5],
          'weights':['uniform', 'distance'],
          'algorithm':['auto', 'ball_tree','kd_tree','brute'],
          'n_jobs':[-1]}


knn = RandomizedSearchCV(knn,param_distributions=params,cv=10)
knn.fit(train_X_temp,train_y_temp)
knn.best_params_

{'weights': 'uniform',
 'n_neighbors': 19,
 'n_jobs': -1,
 'leaf_size': 1,
 'algorithm': 'auto'}

### For SVM

In [14]:
from sklearn import svm
#making the instance
svm_r=svm.SVR()
#Hyper Parameters Set
params = {'C': [6,7,8,9,10,11,12], 
          'kernel': ['linear','rbf']}
#Making models with hyper parameters sets
svm_r = RandomizedSearchCV(svm_r, param_distributions=params,cv=10, n_jobs=-1)
#Learning
svm_r.fit(train_X_temp,train_y_temp)
#The best hyper parameters set
print("Best Hyper Parameters:\n",svm_r.best_params_)

Best Hyper Parameters:
 {'kernel': 'rbf', 'C': 11}


In [None]:
#Todo: Tuning For XGBoost