In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [None]:
# Load the encoded features
encoded_features_path = '/kaggle/input/ds2-extracted-features/encoded_features_7.npy'
features_array = np.load(encoded_features_path)

# Load the features and labels from your numpy array
data = features_array  # Adjust path if needed
X = data[:, :-1]  # All columns except the last one are features
y_ = data[:, -1] # Last column is the label
numeric_labels = []
for label in y_:
    if label == 'N':
        numeric_labels.append(0)
    if label == 'L':
        numeric_labels.append(1)
    if label == 'R':
        numeric_labels.append(2)
    if label == 'V':
        numeric_labels.append(3)
    if label == 'A':
        numeric_labels.append(4)
        
y = numeric_labels

In [None]:
# Define kernels and hyperparameters
kernels = {
    "Linear": {"kernel": "linear"},
    "Quadratic": {"kernel": "poly", "degree": 2},
    "Fine Gaussian": {"kernel": "rbf", "gamma": 0.1},
    "Medium Gaussian": {"kernel": "rbf", "gamma": "scale"},
    "Coarse Gaussian": {"kernel": "rbf", "gamma": 0.01}
}

# Initialize a results list to store metrics for each kernel
results = []

# Helper function to calculate sensitivity and specificity
def calculate_sensitivity_specificity(cm):
    sensitivities, specificities = [], []
    for i in range(len(cm)):
        tp = cm[i, i]
        fn = sum(cm[i, :]) - tp
        fp = sum(cm[:, i]) - tp
        tn = cm.sum() - (tp + fn + fp)

        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        sensitivities.append(sensitivity)
        specificities.append(specificity)

    avg_sensitivity = np.mean(sensitivities)
    avg_specificity = np.mean(specificities)
    return avg_sensitivity, avg_specificity

In [None]:
# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Loop over each kernel
for kernel_name, params in kernels.items():
    # Initialize SVM model with specified kernel and parameters
    svm_model = SVC(**params, C=1.0, decision_function_shape='ovo')
    # Train the model
    svm_model.fit(X_train, y_train)
    # Predict on the test set
    y_pred = svm_model.predict(X_test)
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    # Calculate confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    # Calculate sensitivity and specificity
    sensitivity, specificity = calculate_sensitivity_specificity(cm)
    # Append results to the list
    results.append([kernel_name, accuracy, sensitivity, specificity])

In [None]:
# Print results in a table format
print("Kernel Comparison Results for DS2:")
print("{:<15} {:<10} {:<15} {:<15}".format("Kernel", "Accuracy", "Sensitivity", "Specificity"))
for row in results:
    print("{:<15} {:.4f}     {:.4f}         {:.4f}".format(row[0], row[1], row[2], row[3]))