# SVM

## Imports and Data Loading

In [1]:
import sys
sys.path.append('/home/apoorva/Desktop/Work/olr')

In [2]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import optuna
from sklearn.metrics import f1_score
from scripts.utils.load import load_pca_anomaly

In [2]:
pca_x, olr_labels = load_pca_anomaly()
pca_x.shape, olr_labels.shape

((5960, 5960), (40, 135))

In [4]:
pca_x_50 = pca_x[:, :50]
pca_x_50.shape

(5920, 50)

In [9]:
def svm_classifier(X_train, y_train, X_test, y_test):
    def objective(trial):
        """Define the objective function"""

        params = {
            'kernel': trial.suggest_categorical('kernel', ['sigmoid', 'rbf', 'poly']),
            'C': trial.suggest_float('C', 1e-2, 1e2),
            'degree': trial.suggest_int('degree', 2, 8)
        }

        # Fit the model
        optuna_model = SVC(**params)
        optuna_model.fit(X_train, y_train)

        # Make predictions
        y_pred = optuna_model.predict(X_test)

        # Evaluate predictions
        accuracy = f1_score(y_test, y_pred, average='macro')
        return accuracy

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50, n_jobs=-1)
    print('Number of finished trials: {}'.format(len(study.trials)))
    print('Best trial:')
    trial = study.best_trial

    print('  Value: {}'.format(trial.value))
    print('  Params: ')

    for key, value in trial.params.items():
        print('    {}: {}'.format(key, value))

    params = trial.params
    classifier = SVC(**params)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_train)
    print("Training\n")
    print(confusion_matrix(y_pred=y_pred, y_true=y_train))
    print(classification_report(y_pred=y_pred, y_true=y_train))

    print("Testing\n")
    y_pred = classifier.predict(X_test)
    print(confusion_matrix(y_pred=y_pred, y_true=y_test))
    print(classification_report(y_pred=y_pred, y_true=y_test))

In [6]:
def pentad_data(count):
    '''
    count is 0-indexed
    count = 0 corresponds to first leading pentad
    count = 1 corresponds to second leading pentad
    count = 2 corresponds to third leading pentad
    '''
    global olr_labels, pca_x
    assert count == 0 or count == 1 or count == 2
    pca_x_50 = pca_x[:, :50]
    pca_x_50 = np.array([pca_x_50[i*40+j:i*40+j+15, :] for i in range(40) for j in range(134 - (5*count))])
    labels = np.reshape(np.reshape(olr_labels, (40, 134))[:, (5*count):], (-1))
    X_train, X_test, y_train, y_test = train_test_split(pca_x_50, labels, random_state=1337, train_size=0.875, stratify=labels)
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    return X_train, X_test, y_train, y_test

## First Pentad

In [7]:
X_train, X_test, y_train, y_test = pentad_data(0)

In [10]:
svm_classifier(X_train, y_train, X_test, y_test)

[I 2024-05-25 03:18:46,387] A new study created in memory with name: no-name-28f41c5b-a881-4fd0-a0ee-6ae5c176f2ea
[I 2024-05-25 03:18:58,571] Trial 9 finished with value: 0.34491755044202677 and parameters: {'kernel': 'sigmoid', 'C': 75.1692722918331, 'degree': 8}. Best is trial 9 with value: 0.34491755044202677.
[I 2024-05-25 03:18:59,079] Trial 1 finished with value: 0.3222574402902272 and parameters: {'kernel': 'sigmoid', 'C': 57.34220527663144, 'degree': 7}. Best is trial 9 with value: 0.34491755044202677.
[I 2024-05-25 03:18:59,312] Trial 4 finished with value: 0.3374156344762375 and parameters: {'kernel': 'sigmoid', 'C': 25.088646932059152, 'degree': 5}. Best is trial 9 with value: 0.34491755044202677.
[I 2024-05-25 03:18:59,955] Trial 11 finished with value: 0.3419163450100746 and parameters: {'kernel': 'sigmoid', 'C': 31.326479099077932, 'degree': 5}. Best is trial 9 with value: 0.34491755044202677.
[I 2024-05-25 03:19:00,721] Trial 3 finished with value: 0.32822827874409577 an

Number of finished trials: 50
Best trial:
  Value: 0.45750149924949507
  Params: 
    kernel: poly
    C: 71.98130321544383
    degree: 2
Training

[[ 203  381   30]
 [  69 3254   78]
 [  34  456  185]]
              precision    recall  f1-score   support

         0.0       0.66      0.33      0.44       614
         1.0       0.80      0.96      0.87      3401
         2.0       0.63      0.27      0.38       675

    accuracy                           0.78      4690
   macro avg       0.70      0.52      0.56      4690
weighted avg       0.75      0.78      0.74      4690

Testing

[[ 22  62   4]
 [ 34 427  25]
 [  7  70  19]]
              precision    recall  f1-score   support

         0.0       0.35      0.25      0.29        88
         1.0       0.76      0.88      0.82       486
         2.0       0.40      0.20      0.26        96

    accuracy                           0.70       670
   macro avg       0.50      0.44      0.46       670
weighted avg       0.66      0.70  

## Second Pentad

In [11]:
X_train, X_test, y_train, y_test = pentad_data(1)

In [12]:
svm_classifier(X_train, y_train, X_test, y_test)

[I 2024-05-25 03:20:10,762] A new study created in memory with name: no-name-dea47288-2802-4f3e-b2fc-c7cc814f5f73
[I 2024-05-25 03:20:21,099] Trial 9 finished with value: 0.34908144218489046 and parameters: {'kernel': 'sigmoid', 'C': 27.763689985136185, 'degree': 8}. Best is trial 9 with value: 0.34908144218489046.
[I 2024-05-25 03:20:21,478] Trial 3 finished with value: 0.32950412384917177 and parameters: {'kernel': 'sigmoid', 'C': 73.49992051231689, 'degree': 2}. Best is trial 9 with value: 0.34908144218489046.
[I 2024-05-25 03:20:21,526] Trial 5 finished with value: 0.3280427960437003 and parameters: {'kernel': 'sigmoid', 'C': 12.579574659841274, 'degree': 3}. Best is trial 9 with value: 0.34908144218489046.
[I 2024-05-25 03:20:23,071] Trial 2 finished with value: 0.3432025747312372 and parameters: {'kernel': 'sigmoid', 'C': 33.40828295668278, 'degree': 6}. Best is trial 9 with value: 0.34908144218489046.
[I 2024-05-25 03:20:24,681] Trial 6 finished with value: 0.4317032760003729 an

Number of finished trials: 50
Best trial:
  Value: 0.4426327857457702
  Params: 
    kernel: rbf
    C: 2.149011331759734
    degree: 7
Training

[[ 172  397   21]
 [  47 3147   56]
 [  30  474  171]]
              precision    recall  f1-score   support

         0.0       0.69      0.29      0.41       590
         1.0       0.78      0.97      0.87      3250
         2.0       0.69      0.25      0.37       675

    accuracy                           0.77      4515
   macro avg       0.72      0.50      0.55      4515
weighted avg       0.76      0.77      0.73      4515

Testing

[[ 18  59   7]
 [ 19 420  26]
 [  7  73  16]]
              precision    recall  f1-score   support

         0.0       0.41      0.21      0.28        84
         1.0       0.76      0.90      0.83       465
         2.0       0.33      0.17      0.22        96

    accuracy                           0.70       645
   macro avg       0.50      0.43      0.44       645
weighted avg       0.65      0.70    

## Third Pentad

In [13]:
X_train, X_test, y_train, y_test = pentad_data(2)

In [14]:
svm_classifier(X_train, y_train, X_test, y_test)

[I 2024-05-25 03:21:32,324] A new study created in memory with name: no-name-4622af0a-80fd-4423-b9e0-4684f63d1b24
[I 2024-05-25 03:21:43,180] Trial 11 finished with value: 0.3732837723150258 and parameters: {'kernel': 'sigmoid', 'C': 49.93355693446433, 'degree': 5}. Best is trial 11 with value: 0.3732837723150258.
[I 2024-05-25 03:21:43,425] Trial 1 finished with value: 0.37896497110349486 and parameters: {'kernel': 'sigmoid', 'C': 63.8635393062005, 'degree': 2}. Best is trial 1 with value: 0.37896497110349486.
[I 2024-05-25 03:21:43,432] Trial 4 finished with value: 0.3557095566666644 and parameters: {'kernel': 'sigmoid', 'C': 63.98344862983269, 'degree': 5}. Best is trial 1 with value: 0.37896497110349486.
[I 2024-05-25 03:21:43,855] Trial 0 finished with value: 0.3746755929284857 and parameters: {'kernel': 'sigmoid', 'C': 97.90849806333738, 'degree': 6}. Best is trial 1 with value: 0.37896497110349486.
[I 2024-05-25 03:21:43,928] Trial 8 finished with value: 0.33735827554067765 and 

Number of finished trials: 50
Best trial:
  Value: 0.4507074929316058
  Params: 
    kernel: poly
    C: 56.760460237880196
    degree: 3
Training

[[ 160  378   30]
 [  39 2991   67]
 [  25  448  202]]
              precision    recall  f1-score   support

         0.0       0.71      0.28      0.40       568
         1.0       0.78      0.97      0.87      3097
         2.0       0.68      0.30      0.41       675

    accuracy                           0.77      4340
   macro avg       0.72      0.52      0.56      4340
weighted avg       0.76      0.77      0.73      4340

Testing

[[ 21  57   3]
 [ 20 395  28]
 [  7  74  15]]
              precision    recall  f1-score   support

         0.0       0.44      0.26      0.33        81
         1.0       0.75      0.89      0.82       443
         2.0       0.33      0.16      0.21        96

    accuracy                           0.70       620
   macro avg       0.50      0.44      0.45       620
weighted avg       0.64      0.70  