In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import FastICA
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

# Loading the dataset
csv_file_path = 'wavelet_metrics.csv'  # Path to your CSV file
data = pd.read_csv(csv_file_path)

# Separate features and labels
X = data.drop(['Data', 'label'], axis=1) 
y = data['label']  # Extracting the labels (0 or 1)

class_0 = data[data['label'] == 0]
class_1 = data[data['label'] == 1]

# Split data for training and testing
train_0, test_0 = train_test_split(class_0, test_size=0.5, random_state=0)
train_1, test_1 = train_test_split(class_1, test_size=0.5, random_state=0)

# Concatenate the training and testing data for both classes
train_data = pd.concat([train_0, train_1])
test_data = pd.concat([test_0, test_1])

# Shuffle the data
train_data = train_data.sample(frac=1).reset_index(drop=True)
test_data = test_data.sample(frac=1).reset_index(drop=True)

# Extracting features and labels from the training and testing sets
X_train = train_data.drop(['Data', 'label'], axis=1)
y_train = train_data['label']
X_test = test_data.drop(['Data', 'label'], axis=1)
y_test = test_data['label']

# Standardizing the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialization of list to store results
results = []

# Defining the SVM hyperparameters for GridSearchCV
param_grid = {
    'C': [0.01, 0.1, 1, 2, 3, 5, 10, 20, 30, 40, 50, 100, 200],
    'gamma': [0.0001, 0.001, 0.01, 0.02, 0.05, 0.08, 0.1, 0.2, 0.3, 0.5, 1],
}

# Used StratifiedKFold for balanced cross-validation splits
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Looping over different n_components for ICA
for n in range(2, 17):  # Components from 2 to 16
    print(f"\nEvaluating ICA with n_components={n}")
    
    # Applying ICA transformation
    ica = FastICA(n_components=n, random_state=0)
    X_train_ica = ica.fit_transform(X_train)
    X_test_ica = ica.transform(X_test)
    
    # SVM with RBF kernel
    svm = SVC(kernel='rbf')
    
    # GridSearchCV to find the optimal hyperparameters
    grid_search = GridSearchCV(svm, param_grid, cv=cv, scoring='accuracy', verbose=1, n_jobs=-1)
    grid_search.fit(X_train_ica, y_train)
    
    # Get the best model
    best_svm = grid_search.best_estimator_
    best_svm.fit(X_train_ica, y_train)
    
    # Predict on the test set
    y_pred = best_svm.predict(X_test_ica)
    
    # Calculate confusion matrix
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    
    # Calculate evaluation metrics
    sensitivity = tp / (tp + fn)  # True Positive Rate
    specificity = tn / (tn + fp)  # True Negative Rate
    accuracy = accuracy_score(y_test, y_pred)
    
    # Print and store the results
    print(f"Best Parameters for n_components={n}: {grid_search.best_params_}")
    print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")
    print(f"Sensitivity: {sensitivity:.2f}, Specificity: {specificity:.2f}, Accuracy: {accuracy:.2f}")
    
    # Append results to list
    results.append({
        'n_components': n,
        'best_params': grid_search.best_params_,
        'sensitivity': sensitivity,
        'specificity': specificity,
        'accuracy': accuracy
    })

# Display all results
for result in results:
    print(f"\nICA Components: {result['n_components']}, "
          f"Accuracy: {result['accuracy']:.2f}, "
          f"Sensitivity: {result['sensitivity']:.2f}, "
          f"Specificity: {result['specificity']:.2f}, "
          f"Best Params: {result['best_params']}")


Fitting 10 folds for each of 112 candidates, totalling 1120 fits
Best Parameters: {'C': 30, 'gamma': 0.2}
Confusion Matrix:
[[400   0]
 [  0 400]]

Sensitivity: 1.00
Specificity: 1.00
Accuracy: 1.00


In [6]:
# Print test data
print(test_data)

      Data      MAV_A5  Avg.Power_A5       SD_A5  MAV_Ratio_A5      MAV_D3  \
0    S0975   33.937457   1775.761246   41.206150      0.207859  152.622912   
1    Z0150   16.045263    417.767289   19.732992      2.136602    7.928320   
2    Z0713   33.031759   1635.871008   35.700885      2.468799   19.900849   
3    S0012  156.938298  40952.884466  198.482040      0.873709  203.521622   
4    Z0205   11.209478    192.299821   13.557591      1.402883    8.740105   
..     ...         ...           ...         ...           ...         ...   
795  Z0392   35.836916   1957.180340   44.048883      2.412510   15.235989   
796  S0962   47.935907   3347.448463   39.908755      1.127567   33.122354   
797  Z0503   23.384743    872.623085   29.529524      1.556031   15.085268   
798  Z0264   30.553623   1479.375151   36.458808      2.252864   15.477678   
799  Z0750   23.690524    856.399739   27.260689      1.798904   13.818283   

     Avg.Power_D3       SD_D3  MAV_Ratio_D3      MAV_D4   Avg.P