# SVM fitting

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("DataSets/weatherAUSProcessed.csv")

train_df, test_df = train_test_split(df, test_size=0.25, random_state=452)

In [16]:
from sklearn.svm import SVC

train_x = train_df[['Pressure3pm', 'Temp3pm', 'WindSpeed3pm', 'Humidity3pm', 'WindDir3pm', 'RainTomorrow']]
train_y = train_df['RainTomorrow']

svm_classifier = SVC()
svm_classifier.fit(X,y)

SVC()

## Test against Training set

In [17]:
from sklearn.metrics import confusion_matrix
y_predicted = svm_classifier.predict(train_x)
matrix = confusion_matrix(train_y, y_predicted)
print(matrix)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print ("Accuracy is ", accuracy_score(train_y, y_predicted))
print ("Precision is ", precision_score(train_y, y_predicted, average="weighted"))
print ("Sensitivity is ", recall_score(train_y, y_predicted, average="weighted"))
print ("F1 is ", f1_score(train_y, y_predicted, average="weighted"))

[[10691     0]
 [ 2809     0]]
Accuracy is  0.7919259259259259
Precision is  0.6271466721536351
Sensitivity is  0.7919259259259259
F1 is  0.6999694162353002


  _warn_prf(average, modifier, msg_start, len(result))


## Test against Test set

In [20]:
test_x = test_df[['Pressure3pm', 'Temp3pm', 'WindSpeed3pm', 'Humidity3pm', 'WindDir3pm', 'RainTomorrow']]
test_y = test_df['RainTomorrow']

y_predicted = svm_classifier.predict(test_x)
matrix = confusion_matrix(test_y, y_predicted)
print(matrix)

print ("Accuracy is ", accuracy_score(test_y, y_predicted))
print ("Precision is ", precision_score(test_y, y_predicted, average="weighted"))
print ("Sensitivity is ", recall_score(test_y, y_predicted, average="weighted"))
print ("F1 is ", f1_score(test_y, y_predicted, average="weighted"))

[[3503    0]
 [ 997    0]]
Accuracy is  0.7784444444444445
Precision is  0.6059757530864198
Sensitivity is  0.7784444444444445
F1 is  0.6814671720327099


  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
from sklearn.model_selection import KFold

X = df[['Pressure3pm', 'Temp3pm', 'WindSpeed3pm', 'Humidity3pm', 'WindDir3pm', 'RainTomorrow']]
y = df['RainTomorrow']

validation_accuracy = []
validation_f1 =[]

fold_and_validate = KFold(n_splits=5, shuffle=True, random_state=145)
for train_set_indices, validation_set_indices in fold_and_validate.split(X):
    cv_train_set = X.iloc[train_set_indices]
    cv_train_target = y.iloc[train_set_indices]
    
    cv_svc = SVC()
    cv_svc.fit(cv_train_set, cv_train_target)
    
    cv_xvalidation = X.iloc[validation_set_indices]
    cv_y_true = y.iloc[validation_set_indices]
    cv_y_predicted = cv_svc.predict(cv_xvalidation)
    
    cv_accuracy_score = accuracy_score(cv_y_true, cv_y_predicted)
    cv_f1_score = f1_score(cv_y_true, cv_y_predicted,  average="weighted")
    validation_accuracy.append(cv_accuracy_score)
    validation_f1.append(cv_f1_score)
    
print("Cross validation accuracies are: ", validation_accuracy)
print("Cross validation f1 scores  are: ", validation_f1)
    

Cross validation accuracies are:  [0.7894444444444444, 0.7980555555555555, 0.785, 0.7691666666666667, 0.8011111111111111]
Cross validation f1 scores  are:  [0.6965542102176687, 0.7084237945654599, 0.6904481792717087, 0.6688090752080389, 0.7126478853931044]


## Result
Nothing seems out of the ordinary when using SVM. No signs of over/underfitting.