# SVM

## Imports and Data Loading

In [1]:
import sys
sys.path.append('/home/apoorva/Desktop/Work/olr')

In [1]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

from scripts.utils.load import load_pca_anomaly

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pca_x, olr_labels = load_pca_anomaly()
pca_x.shape, olr_labels.shape

((5960, 5960), (40, 135))

In [3]:
pca_x_50 = pca_x[:, :50]
pca_x_50.shape

(5960, 50)

In [19]:
def svm_classifier(X_train, y_train, X_test, y_test, kernel='sigmoid'):
    classifier = SVC(random_state=1337, kernel=kernel)
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_train)
    print("Training\n")
    print(confusion_matrix(y_pred=y_pred, y_true=y_train))
    print(classification_report(y_pred=y_pred, y_true=y_train))

    y_pred = classifier.predict(X_test)
    print("Testing\n")
    print(confusion_matrix(y_pred=y_pred, y_true=y_test))
    print(classification_report(y_pred=y_pred, y_true=y_test))

In [5]:
def pentad_data(count):
    '''
    count is 0-indexed
    count = 0 corresponds to first leading pentad
    count = 1 corresponds to second leading pentad
    count = 2 corresponds to third leading pentad
    '''
    global olr_labels, pca_x
    assert count == 0 or count == 1 or count == 2
    pca_x_50 = pca_x[:, :50]
    pca_x_50 = np.array([pca_x_50[i*40+j:i*40+j+15, :] for j in range(134 - (5*count)) for i in range(40)])
    labels = np.reshape(np.reshape(olr_labels, (40, 135))[:, 1+(5*count):], (-1))
    X_train, X_test, y_train, y_test = train_test_split(pca_x_50, labels, random_state=1337, train_size=0.875, stratify=labels)
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    return X_train, X_test, y_train, y_test

## First Pentad

In [6]:
X_train, X_test, y_train, y_test = pentad_data(0)

In [7]:
svm_classifier(X_train, y_train, X_test, y_test, kernel='sigmoid')

Training

[[   4  592   18]
 [  44 3267   90]
 [   9  657    9]]
              precision    recall  f1-score   support

         0.0       0.07      0.01      0.01       614
         1.0       0.72      0.96      0.83      3401
         2.0       0.08      0.01      0.02       675

    accuracy                           0.70      4690
   macro avg       0.29      0.33      0.29      4690
weighted avg       0.54      0.70      0.60      4690

Testing

[[  1  81   6]
 [  5 472   9]
 [  1  94   1]]
              precision    recall  f1-score   support

         0.0       0.14      0.01      0.02        88
         1.0       0.73      0.97      0.83       486
         2.0       0.06      0.01      0.02        96

    accuracy                           0.71       670
   macro avg       0.31      0.33      0.29       670
weighted avg       0.56      0.71      0.61       670



In [8]:
svm_classifier(X_train, y_train, X_test, y_test, kernel='rbf')

Training

[[  24  586    4]
 [   3 3398    0]
 [   5  654   16]]
              precision    recall  f1-score   support

         0.0       0.75      0.04      0.07       614
         1.0       0.73      1.00      0.85      3401
         2.0       0.80      0.02      0.05       675

    accuracy                           0.73      4690
   macro avg       0.76      0.35      0.32      4690
weighted avg       0.74      0.73      0.63      4690

Testing

[[  1  86   1]
 [  2 483   1]
 [  1  95   0]]
              precision    recall  f1-score   support

         0.0       0.25      0.01      0.02        88
         1.0       0.73      0.99      0.84       486
         2.0       0.00      0.00      0.00        96

    accuracy                           0.72       670
   macro avg       0.33      0.34      0.29       670
weighted avg       0.56      0.72      0.61       670



In [9]:
svm_classifier(X_train, y_train, X_test, y_test, kernel='poly')

Training

[[  69  537    8]
 [  11 3386    4]
 [  10  610   55]]
              precision    recall  f1-score   support

         0.0       0.77      0.11      0.20       614
         1.0       0.75      1.00      0.85      3401
         2.0       0.82      0.08      0.15       675

    accuracy                           0.75      4690
   macro avg       0.78      0.40      0.40      4690
weighted avg       0.76      0.75      0.67      4690

Testing

[[  4  82   2]
 [ 14 463   9]
 [  5  90   1]]
              precision    recall  f1-score   support

         0.0       0.17      0.05      0.07        88
         1.0       0.73      0.95      0.83       486
         2.0       0.08      0.01      0.02        96

    accuracy                           0.70       670
   macro avg       0.33      0.34      0.31       670
weighted avg       0.56      0.70      0.61       670



## Second Pentad

In [10]:
X_train, X_test, y_train, y_test = pentad_data(1)

In [11]:
svm_classifier(X_train, y_train, X_test, y_test, kernel='sigmoid')

Training

[[  16  566    8]
 [ 119 3063   68]
 [  15  654    6]]
              precision    recall  f1-score   support

         0.0       0.11      0.03      0.04       590
         1.0       0.72      0.94      0.81      3250
         2.0       0.07      0.01      0.02       675

    accuracy                           0.68      4515
   macro avg       0.30      0.33      0.29      4515
weighted avg       0.54      0.68      0.59      4515

Testing

[[  5  74   5]
 [ 21 435   9]
 [  6  87   3]]
              precision    recall  f1-score   support

         0.0       0.16      0.06      0.09        84
         1.0       0.73      0.94      0.82       465
         2.0       0.18      0.03      0.05        96

    accuracy                           0.69       645
   macro avg       0.35      0.34      0.32       645
weighted avg       0.57      0.69      0.61       645



In [12]:
svm_classifier(X_train, y_train, X_test, y_test, kernel='rbf')

Training

[[  22  568    0]
 [   4 3246    0]
 [   3  644   28]]
              precision    recall  f1-score   support

         0.0       0.76      0.04      0.07       590
         1.0       0.73      1.00      0.84      3250
         2.0       1.00      0.04      0.08       675

    accuracy                           0.73      4515
   macro avg       0.83      0.36      0.33      4515
weighted avg       0.77      0.73      0.63      4515

Testing

[[  1  83   0]
 [  2 461   2]
 [  0  95   1]]
              precision    recall  f1-score   support

         0.0       0.33      0.01      0.02        84
         1.0       0.72      0.99      0.84       465
         2.0       0.33      0.01      0.02        96

    accuracy                           0.72       645
   macro avg       0.46      0.34      0.29       645
weighted avg       0.61      0.72      0.61       645



In [13]:
svm_classifier(X_train, y_train, X_test, y_test, kernel='poly')

Training

[[  59  527    4]
 [  15 3229    6]
 [   6  622   47]]
              precision    recall  f1-score   support

         0.0       0.74      0.10      0.18       590
         1.0       0.74      0.99      0.85      3250
         2.0       0.82      0.07      0.13       675

    accuracy                           0.74      4515
   macro avg       0.77      0.39      0.38      4515
weighted avg       0.75      0.74      0.65      4515

Testing

[[  1  82   1]
 [  9 450   6]
 [  1  94   1]]
              precision    recall  f1-score   support

         0.0       0.09      0.01      0.02        84
         1.0       0.72      0.97      0.82       465
         2.0       0.12      0.01      0.02        96

    accuracy                           0.70       645
   macro avg       0.31      0.33      0.29       645
weighted avg       0.55      0.70      0.60       645



## Third Pentad

In [14]:
X_train, X_test, y_train, y_test = pentad_data(1)

In [15]:
svm_classifier(X_train, y_train, X_test, y_test, kernel='sigmoid')

Training

[[  16  566    8]
 [ 119 3063   68]
 [  15  654    6]]
              precision    recall  f1-score   support

         0.0       0.11      0.03      0.04       590
         1.0       0.72      0.94      0.81      3250
         2.0       0.07      0.01      0.02       675

    accuracy                           0.68      4515
   macro avg       0.30      0.33      0.29      4515
weighted avg       0.54      0.68      0.59      4515

Testing

[[  5  74   5]
 [ 21 435   9]
 [  6  87   3]]
              precision    recall  f1-score   support

         0.0       0.16      0.06      0.09        84
         1.0       0.73      0.94      0.82       465
         2.0       0.18      0.03      0.05        96

    accuracy                           0.69       645
   macro avg       0.35      0.34      0.32       645
weighted avg       0.57      0.69      0.61       645



In [16]:
svm_classifier(X_train, y_train, X_test, y_test, kernel='rbf')

Training

[[  22  568    0]
 [   4 3246    0]
 [   3  644   28]]
              precision    recall  f1-score   support

         0.0       0.76      0.04      0.07       590
         1.0       0.73      1.00      0.84      3250
         2.0       1.00      0.04      0.08       675

    accuracy                           0.73      4515
   macro avg       0.83      0.36      0.33      4515
weighted avg       0.77      0.73      0.63      4515

Testing

[[  1  83   0]
 [  2 461   2]
 [  0  95   1]]
              precision    recall  f1-score   support

         0.0       0.33      0.01      0.02        84
         1.0       0.72      0.99      0.84       465
         2.0       0.33      0.01      0.02        96

    accuracy                           0.72       645
   macro avg       0.46      0.34      0.29       645
weighted avg       0.61      0.72      0.61       645



In [17]:
svm_classifier(X_train, y_train, X_test, y_test, kernel='poly')

Training

[[  59  527    4]
 [  15 3229    6]
 [   6  622   47]]
              precision    recall  f1-score   support

         0.0       0.74      0.10      0.18       590
         1.0       0.74      0.99      0.85      3250
         2.0       0.82      0.07      0.13       675

    accuracy                           0.74      4515
   macro avg       0.77      0.39      0.38      4515
weighted avg       0.75      0.74      0.65      4515

Testing

[[  1  82   1]
 [  9 450   6]
 [  1  94   1]]
              precision    recall  f1-score   support

         0.0       0.09      0.01      0.02        84
         1.0       0.72      0.97      0.82       465
         2.0       0.12      0.01      0.02        96

    accuracy                           0.70       645
   macro avg       0.31      0.33      0.29       645
weighted avg       0.55      0.70      0.60       645

