In [2]:
import pandas as pd
import numpy as np
from scipy.integrate import simpson as simps
from scipy.signal import find_peaks
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import warnings

In [7]:
import pandas as pd
import numpy as np

# Load CSV files (update the paths if needed)
w1 = pd.read_csv('data/w1.csv')
w2 = pd.read_csv('data/w2.csv')
w3 = pd.read_csv('data/w3.csv')
w4 = pd.read_csv('data/w4.csv')

# Combine the four CSV files into one DataFrame
df = pd.concat([w1, w2, w3, w4], ignore_index=True)

# Shuffle the data to ensure randomness
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Save the combined and shuffled dataset (as 'combined_data.csv' and 'all_data.csv')
df.to_csv("combined_data.csv", index=False)
df.to_csv("all_data.csv", index=False)

print("Combined and shuffled dataset saved as 'combined_data.csv' and 'all_data.csv'")


Combined and shuffled dataset saved as 'combined_data.csv' and 'all_data.csv'


In [9]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

# Last column is the class label, separate features (X) and target (y)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

def evaluate_model(clf, X, y):
    # Split the data into 70% training and 30% testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf.fit(X_train, y_train)
    test_acc = accuracy_score(y_test, clf.predict(X_test))
    cross_val_acc = cross_val_score(clf, X, y, cv=10).mean()
    return round(test_acc * 100, 2), round(cross_val_acc * 100, 2)

# Dictionary to store SVM results
svm_results = {}

# 1. Base SVM with original features
svm_results["Original features"] = evaluate_model(SVC(), X, y)

# 2. SVM with Hyperparameter Tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.1, 1],
    'kernel': ['rbf']
}
grid_svm = GridSearchCV(SVC(), param_grid, cv=3)
grid_svm.fit(X, y)
best_svm = grid_svm.best_estimator_
svm_results["With hyper-parameter tuning"] = evaluate_model(best_svm, X, y)

# 3. SVM with Feature Selection and Hyperparameter Tuning
# (Selecting the 100 best features)
X_selected = SelectKBest(score_func=f_classif, k=100).fit_transform(X, y)
svm_results["With feature selection and hyperparameter tuning"] = evaluate_model(best_svm, X_selected, y)

# 4. SVM with PCA and Hyperparameter Tuning
# (Reducing to 10 principal components)
X_pca = PCA(n_components=10).fit_transform(X)
svm_results["With PCA and hyperparameter tuning"] = evaluate_model(best_svm, X_pca, y)

# Train and evaluate other classifiers
other_results = {
    "SVM": svm_results["With hyper-parameter tuning"],
    "SGD": evaluate_model(SGDClassifier(), X, y),
    "RandomForest": evaluate_model(RandomForestClassifier(), X, y),
    "MLP": evaluate_model(MLPClassifier(max_iter=500), X, y)
}

# Create summary tables
svm_summary = pd.DataFrame(svm_results, index=["Train-test split", "Cross-validation"]).T
other_summary = pd.DataFrame(other_results, index=["Train-test split", "Cross-validation"]).T

print("\n=== SVM Summary Table ===")
print(svm_summary)

print("\n=== Other Classifiers Summary Table ===")
print(other_summary)



=== SVM Summary Table ===
                                                  Train-test split  \
Original features                                            89.17   
With hyper-parameter tuning                                  89.48   
With feature selection and hyperparameter tuning             89.17   
With PCA and hyperparameter tuning                           90.03   

                                                  Cross-validation  
Original features                                            89.18  
With hyper-parameter tuning                                  89.59  
With feature selection and hyperparameter tuning             89.01  
With PCA and hyperparameter tuning                           89.98  

=== Other Classifiers Summary Table ===
              Train-test split  Cross-validation
SVM                      89.48             89.59
SGD                      89.02             82.68
RandomForest             92.63             92.67
MLP                      89.08          

In [10]:
# If you prefer to display the tables as DataFrames in Jupyter Notebook:
display(svm_summary)
display(other_summary)


Unnamed: 0,Train-test split,Cross-validation
Original features,89.17,89.18
With hyper-parameter tuning,89.48,89.59
With feature selection and hyperparameter tuning,89.17,89.01
With PCA and hyperparameter tuning,90.03,89.98


Unnamed: 0,Train-test split,Cross-validation
SVM,89.48,89.59
SGD,89.02,82.68
RandomForest,92.63,92.67
MLP,89.08,85.53
