### Note: Due to long runtimes, we applied SVC on a subset of the data which is 100000 datapoints.

In [1]:
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv('preprocessed_data.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Time_of_departure,Length_of_Fligh,Airline_ID,AirportFrom,AirportTo,DayOfWeek,Class
0,0,0.89993,0.215267,0.294118,0.054795,0.441781,0.0,0
1,1,0.244927,0.222901,0.705882,0.222603,0.712329,0.5,0
2,2,0.811756,0.218321,0.176471,0.119863,0.205479,0.333333,0
3,3,0.979706,0.525191,0.823529,0.695205,0.743151,0.833333,0
4,4,0.477257,0.149618,0.470588,0.109589,0.054795,0.5,0


In [4]:
df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [5]:
df.head()

Unnamed: 0,Time_of_departure,Length_of_Fligh,Airline_ID,AirportFrom,AirportTo,DayOfWeek,Class
0,0.89993,0.215267,0.294118,0.054795,0.441781,0.0,0
1,0.244927,0.222901,0.705882,0.222603,0.712329,0.5,0
2,0.811756,0.218321,0.176471,0.119863,0.205479,0.333333,0
3,0.979706,0.525191,0.823529,0.695205,0.743151,0.833333,0
4,0.477257,0.149618,0.470588,0.109589,0.054795,0.5,0


In [6]:
df = df.sample(frac = 1)    

In [7]:
df.head()

Unnamed: 0,Time_of_departure,Length_of_Fligh,Airline_ID,AirportFrom,AirportTo,DayOfWeek,Class
127995,0.874738,0.203053,0.764706,0.273973,0.712329,1.0,0
402570,0.804059,0.154198,0.470588,0.054795,0.15411,0.333333,0
490729,0.951714,0.076336,0.058824,0.273973,0.69863,0.833333,0
414420,0.755073,0.169466,0.294118,0.832192,0.893836,0.666667,1
140870,0.76627,0.10687,0.941176,0.462329,0.130137,0.333333,1


In [8]:
X = df.drop(['Class'], axis=1)[:100000]
Y = df['Class'][:100000]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=101)

### Choosing Kernel

In [9]:
kernels = ['linear', 'rbf', 'poly']

In [10]:
for kernel in kernels:
    svc_1 = svm.SVC(kernel=kernel).fit(X_train, Y_train)
    Y_pred = svc_1.predict(X_test)
    print('Accuracy for the {0} kernel is: {1}'.format(kernel,metrics.accuracy_score(Y_test, Y_pred)))
    print('Precision for the {0} kernel is: {1}'.format(kernel,metrics.precision_score(Y_test, Y_pred)))

Accuracy for the linear kernel is: 0.5764666666666667
Precision for the linear kernel is: 0.5299273440202975
Accuracy for the rbf kernel is: 0.6063666666666667
Precision for the rbf kernel is: 0.5843057870921649
Accuracy for the poly kernel is: 0.5920666666666666
Precision for the poly kernel is: 0.5683802133850631


### Choosing Gamma for the rbf Kernel

In [11]:
gammas = ['scale', 'auto', 0.1, 1, 10, 100]

In [12]:
for gamma in gammas:
    svc_1 = svm.SVC(kernel='rbf', gamma=gamma).fit(X_train, Y_train)
    Y_pred = svc_1.predict(X_test)
    print('Accuracy for the {0} gamma is: {1}'.format(gamma,metrics.accuracy_score(Y_test, Y_pred)))
    print('Precision for the {0} gamma is: {1}'.format(gamma,metrics.precision_score(Y_test, Y_pred)))

Accuracy for the scale gamma is: 0.6063666666666667
Precision for the scale gamma is: 0.5843057870921649
Accuracy for the auto gamma is: 0.5848666666666666
Precision for the auto gamma is: 0.5644541046647717
Accuracy for the 0.1 gamma is: 0.5830666666666666
Precision for the 0.1 gamma is: 0.5615028306742151
Accuracy for the 1 gamma is: 0.5995666666666667
Precision for the 1 gamma is: 0.5820915740991601
Accuracy for the 10 gamma is: 0.6221
Precision for the 10 gamma is: 0.6022973558734287
Accuracy for the 100 gamma is: 0.6179666666666667
Precision for the 100 gamma is: 0.581081081081081


### Choosing C for the rbf Kernel

In [13]:
cs = [0.1, 1, 10, 100, 1000]

In [15]:
for c in cs:
    svc_1 = svm.SVC(kernel='rbf').fit(X_train, Y_train)
    Y_pred = svc_1.predict(X_test)
    print('Accuracy for the {0} c is: {1}'.format(c,metrics.accuracy_score(Y_test, Y_pred)))
    print('Precision for the {0} c is: {1}'.format(c,metrics.precision_score(Y_test, Y_pred)))

Accuracy for the 0.1 c is: 0.6063666666666667
Precision for the 0.1 c is: 0.5843057870921649
Accuracy for the 1 c is: 0.6063666666666667
Precision for the 1 c is: 0.5843057870921649
Accuracy for the 10 c is: 0.6063666666666667
Precision for the 10 c is: 0.5843057870921649
Accuracy for the 100 c is: 0.6063666666666667
Precision for the 100 c is: 0.5843057870921649
Accuracy for the 1000 c is: 0.6063666666666667
Precision for the 1000 c is: 0.5843057870921649


## The chosen parameters for SVC

In [16]:
kernel = 'rbf'
gamma = 10
c = 1.0 

In [18]:
svc = svm.SVC(kernel='rbf', gamma=10, C=1.0).fit(X_train, Y_train)
Y_pred = svc.predict(X_test)
print('Accuracy = {0}'.format(metrics.accuracy_score(Y_test, Y_pred)))
print('Precision = {0}'.format(metrics.precision_score(Y_test, Y_pred)))

Accuracy = 0.6221
Precision = 0.6022973558734287
