## Using SVM as Model

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score

In [2]:
DF = pd.read_csv('ClimateChange.csv')

DF.tail()

Unnamed: 0,year,month,day,week,temp_2,temp_1,average,actual
343,2019,12,27,Tues,42,42,45.2,47
344,2019,12,28,Wed,42,47,45.3,48
345,2019,12,29,Thurs,47,48,45.3,48
346,2019,12,30,Fri,48,48,45.4,57
347,2019,12,31,Sat,48,57,45.5,40


In [3]:
# Preprocessing the data
DF = pd.get_dummies(DF)

DF.tail()

Unnamed: 0,year,month,day,temp_2,temp_1,average,actual,week_Fri,week_Mon,week_Sat,week_Sun,week_Thurs,week_Tues,week_Wed
343,2019,12,27,42,42,45.2,47,False,False,False,False,False,True,False
344,2019,12,28,42,47,45.3,48,False,False,False,False,False,False,True
345,2019,12,29,47,48,45.3,48,False,False,False,False,True,False,False
346,2019,12,30,48,48,45.4,57,True,False,False,False,False,False,False
347,2019,12,31,48,57,45.5,40,False,False,True,False,False,False,False


In [4]:
Y = DF.iloc[:, 6]
X = DF.drop(columns=['actual'])

Y.tail()

343    47
344    48
345    48
346    57
347    40
Name: actual, dtype: int64

In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.7, random_state=1)

x_test.tail()

Unnamed: 0,year,month,day,temp_2,temp_1,average,week_Fri,week_Mon,week_Sat,week_Sun,week_Thurs,week_Tues,week_Wed
41,2019,2,11,62,56,49.5,False,False,False,False,True,False,False
127,2019,5,10,63,67,63.6,False,False,False,False,False,True,False
333,2019,12,17,39,35,45.2,False,False,True,False,False,False,False
295,2019,11,9,63,71,52.4,False,False,False,False,False,False,True
248,2019,9,22,67,68,68.7,False,False,False,False,True,False,False


In [6]:
# Creating model with tuned hyperparameters
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf']
}

grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=5)
grid.fit(x_train, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=0.01



[CV] END .....................C=0.1, gamma=0.001, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=0.001, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=0.001, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=0.001, kernel=rbf; total time=   0.0s
[CV] END ...........................C=1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ...........................C=1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ...........................C=1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ...........................C=1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ...........................C=1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .........................C=1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END .........................C=1, gamma=0.1, kernel=rbf; total time=   0.0s
[CV] END ...................

In [7]:
# Best parameters and model
print(grid.best_params_)
best_svc = grid.best_estimator_

{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}


In [8]:
svm_model = SVC(kernel='rbf', C=10, gamma=0.001)

svm_model.fit(x_train, y_train)

In [10]:
pred = svm_model.predict(x_test)

In [11]:
mae = mean_absolute_error(y_test, pred)
print('Mean Absolute Error:', round(mae, 2))

Mean Absolute Error: 4.75


In [12]:
mse = mean_squared_error(y_test, pred)
print('Mean Squared Error:', round(mse, 2))

Mean Squared Error: 38.18


In [13]:
r2 = r2_score(y_test, pred)
print('R-squared:', round(r2, 2))

R-squared: 0.76


In [14]:
accuracy = accuracy_score(y_test, pred)
print('Accuracy:', round(accuracy, 2))

Accuracy: 0.09
