# Importing packages

In [None]:
# !pip install mne

In [None]:
# !pip install pyxdf

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import mne
import pyxdf
import glob
import os
import matplotlib.pyplot as plt
from scipy.io import loadmat
import scipy
import sklearn
# ------------------------------------------------------------------------------------------
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda
from sklearn.model_selection import ShuffleSplit, cross_val_score, train_test_split, GridSearchCV, StratifiedShuffleSplit
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC, SVC

# ------------------------------------------------------------------------------------------
from mne.decoding import CSP
from mne import Epochs, pick_types
from mne.channels import make_standard_montage
from mne.datasets import eegbci
from mne.io import concatenate_raws, read_raw_edf

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier

In [None]:
from FBCSP_V5 import FBCSP_V5 as FBCSP

In [None]:
import warnings
warnings.filterwarnings('ignore') # to ignore warnings

In [None]:
verbose = False                    # global variable to suppress output display of MNE functions
mne.set_log_level(verbose=verbose) # to suppress large info outputs

# Data Loading

In [None]:
# Put subject name here for saving path 
sub_name = ''

In [None]:
# Put your path here
data_path = "" 
folder_path = data_path
files   = glob.glob(folder_path + '/*eeg.xdf')
# files   = glob.glob(folder_path)
len(files)     # if  return zero,then no file is loaded

In [None]:
# files.sort()

In [None]:
files

In [None]:
# Filter the paths
filtered_paths = [path for path in files if ('Right Grasp' in path or 'Right Release' in path) and
                                            ('B_Right Grasp' not in path and 'B_Right Release' not in path)]

# filtered_paths = [path for path in files if ('Right Grasp' in path or 'Right Release' in path or 'Baseline' in path) and
#                                             ('B_Right Grasp' not in path and 'B_Right Release' not in path and 'B_Baseline' not in path)]

# Print the number of filtered paths and the paths themselves
print(f'Number of filtered paths: {len(filtered_paths)}')
filtered_paths

In [None]:
all_files = filtered_paths
len(all_files)

# from XDF to MNE

In [None]:
# read stream from xdf
streams, header = pyxdf.load_xdf(all_files[0])
# extract data
data = streams[0]["time_series"].T
# check that it is 9 channels
# assert data.shape[0] == 9 # 9 raw EEG channels

#get channels count
ch_count = int(streams[0]["info"]["channel_count"][0])
# extract channels names
# ch_names = []
# for i in range(ch_count):
#   ch_names.append(streams[0]["info"]["desc"][0]["channels"][0]["channel"][i]["label"][0])
# extract sampling ratw
fs = float(streams[0]["info"]["nominal_srate"][0])
# create info
# info = mne.create_info(ch_names, fs, "eeg")

In [None]:
# extract channels names
ch_names = []
for i in range(ch_count):
  ch_name = streams[0]["info"]["desc"][0]["channels"][0]["channel"][i]["label"][0]
  # ch_name= ch_name.split("\n")[1]
  ch_name= ch_name.split("\n")[0]
  ch_names.append(ch_name)

In [None]:
ch_names

In [None]:
# info = mne.create_info(crack_channel_names, fs, "eeg")
info = mne.create_info(ch_names, fs, "eeg")

In [None]:
def extract_tasks_names(files):
  task_names = []
  for path in files:
      # Split the path to isolate the filename
      filename = path.split('/')[-1]
      # Extract the task name part
      task_name = filename.split('_')[-3]
      # Replace the hyphen with a space to match the desired format
      task_name = task_name.split('-')[-1]
      # Append the task name to the list
      task_names.append(task_name)
      # one more t imefor dupli catio n
      task_names.append(task_name)
  return task_names

In [None]:
task_names = extract_tasks_names(all_files)
print(len(task_names))
task_names

In [None]:
# # List to store all task names
# all_task_names = []
# all_task_names.extend(task_names * 2)  # Duplicate the task names
# print(all_task_names)
# print(len(all_task_names))
# all_task_names

In [None]:
set(task_names)

In [None]:
# event_id = {'Left_Grasp': 1, 'Right_Grasp': 2, 'Right_Release': 3, 'Baseline': 4}
event_id = {'Right Grasp': 1, 'Right Release': 2, 'Baseline': 3}
# Convert the list of task names to the list of numbers
task_numbers = [event_id[task] for task in task_names]
print(task_numbers)

In [None]:
print(len(task_names))
# task_names

In [None]:
# List to store shapes of all data arrays
shapes = []

for fname in all_files:
    streams, header = pyxdf.load_xdf(fname)
    data = streams[0]["time_series"].T
    shapes.append(data.shape)
    # print(data.shape)

# Calculate the minimum shape
min_rows = min(shape[0] for shape in shapes)
min_cols = min(shape[1] for shape in shapes)

print(f"The minimum shape is: ({min_rows}, {min_cols})")

In [None]:
# all_data=[]
# for fname in all_files:
#     streams, header = pyxdf.load_xdf(fname)
#     data = streams[0]["time_series"].T
#     print(data.shape)

In [None]:
# crop parametes
t_1_start = 1
t_1_end = 3

idx_1_start = int(fs*t_1_start)
idx_1_end = int(fs*t_1_end)

t_2_start = 3
t_2_end = 5
idx_2_start = int(fs*t_2_start)
idx_2_end = int(fs*t_2_end)

In [None]:
all_data=[]
for fname in all_files:
  streams, header = pyxdf.load_xdf(fname)
  data = streams[0]["time_series"].T
  # crop data from second 1 to 6
  cropped_data_1 = data[:,idx_1_start:idx_1_end+1]
  cropped_data_2 = data[:,idx_2_start:idx_2_end+1]
  # print(fname)
  all_data.append(cropped_data_1)
  all_data.append(cropped_data_2)

In [None]:
len(all_data)
for data in all_data:
  print(data.shape)

In [None]:
# Combine into a 3D array
combined_array = np.stack(all_data, axis=0)
print(combined_array.shape)

In [None]:
eeg_data = combined_array

In [None]:
epochs = mne.EpochsArray(eeg_data, info, verbose=verbose, tmin=0)

In [None]:
# epochs.set_montage('standard_1020')
epochs.filter(1., None)
# epochs.apply_baseline(baseline=(-.250, 0)) # linear baseline correction

epochs.event_id = event_id
epochs.events[:,2] = task_numbers

In [None]:
epochs.filter(7.0, 32.0)

In [None]:
epochs

## Apply FBCSP

In [None]:
verbose_clf = False # control output of FBCSP function
freqs_band = np.linspace(8, 32, 7) # filter bank choice
cv = 10

In [None]:
freqs_band

## FBCSP
The class must receive in input with the initialization a training set inside a dictionary. The keys of the dictionary must be the label of the two class and each element must be a numpy matrix of dimension "n. trials x n. channels x n.samples". The class must also receive the frequency sampling of the signal.

FBCSP function original has a built-in random splitting so I didn't do a manual splitting here

In [None]:
data, labels = epochs.get_data(), epochs.events[:,-1]

In [None]:
print(data.shape)
print(labels.shape)
labels

In [None]:
data_dict = {'Right Grasp':  epochs['Right Grasp'].get_data(),
            'Right Release': epochs['Right Release'].get_data()}


In [None]:
epochs['Right Grasp'][0].get_data().shape

In [None]:
epochs.get_data()[0].shape

In [None]:
print(data_dict["Right Grasp"].shape)

In [None]:
data_dict["Right Grasp"][0].shape

In [None]:
fs = epochs.info['sfreq']
fs

In [None]:
event_id = {'Right Grasp': 1, 'Right Release': 2, 'Baseline': 3}


## FBCSP

In [None]:
fbcsp_clf = FBCSP(data_dict, fs, freqs_band=freqs_band)

In [None]:
# epochs['Right Grasp'][0].get_data()

In [None]:
data_matrix = fbcsp_clf.tmp_data_matrix
labels = fbcsp_clf.tmp_label
# labels_dict = fbcsp_clf.tmp_label_dict
# labels_dict = fbcsp_clf.n_features_for_classification

In [None]:
labels

In [None]:
X = data_matrix
y = labels

In [None]:
print(data_matrix.shape)

## Normal Train Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, stratify=labels,test_size=0.4, random_state=42)

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
print(y_train.shape)
print(y_train)
print(y_test.shape)
print(y_test)

In [None]:
print(type(X_train))
print(type(y_train))

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score

lda_classifier = LDA()
lda_classifier.fit(X_train, y_train)

In [None]:
# Step 7: Evaluate the Model
y_pred = lda_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

In [None]:
kappa_scorer = sklearn.metrics.make_scorer(sklearn.metrics.cohen_kappa_score)
accuracy_scorer = sklearn.metrics.make_scorer(sklearn.metrics.accuracy_score)

In [None]:
# Accuracy Score
train_score = accuracy_scorer(lda_classifier, X_train, y_train)
test_score = accuracy_scorer(lda_classifier, X_test, y_test)
print("Accuracy Score on Training set: ", train_score)
print("Accuracy Score on Test set: ", test_score)

In [None]:
# Kappa Score
train_score = kappa_scorer(lda_classifier, X_train, y_train)
test_score = kappa_scorer(lda_classifier, X_test, y_test)
print("Kappa Score on Training set: ", train_score)
print("Kappa Score on Test set: ", test_score)


### All Models

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
# from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
# Dictionary to hold models and their names
models = {
    'SVM': SVC(kernel='rbf', C=1, gamma='scale'),
    'Linear SVC': LinearSVC(max_iter=10000, random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'k-NN': KNeighborsClassifier(n_neighbors=5),
    'Logistic Regression': LogisticRegression(solver='lbfgs', max_iter=1000),
    'LDA': LinearDiscriminantAnalysis(),
    'Naive Bayes': GaussianNB(),
    'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'MLP Classifier': MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42),
}

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, stratify=labels,test_size=0.3, random_state=42)
print("Train Shape: ", X_train.shape)
print("Test Shape: ", X_test.shape)

In [None]:
# Dictionary to store the trained models and their accuracies
# trained_models = {}
# model_accuracies = {}

# # Train and evaluate each model
# for name, model in models.items():
#     model.fit(X_train, y_train)  # Train the model
#     y_pred = model.predict(X_test)  # Predict on test data
#     accuracy = accuracy_score(y_test, y_pred)  # Calculate accuracy
#     print(f'{name} Accuracy: {accuracy * 100:.2f}%')

#     # Save the trained model to the dictionary
#     trained_models[name] = model
#     # Save the accuracy to the dictionary
#     model_accuracies[name] = accuracy * 100


In [None]:
# # Find the model with the highest accuracy
# best_model_name = max(model_accuracies, key=model_accuracies.get)
# max_accuracy = model_accuracies[best_model_name]

# # Print the model name and its maximum accuracy
# print(f'Best Model: {best_model_name} with Accuracy: {max_accuracy:.2f}%')

## StratifiedKFold Cross Validation

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
X = data_matrix
y = labels

In [None]:
print(X.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, stratify=labels,test_size=0.3, random_state=42)
print("Train Shape: ", X_train.shape)
print("Test Shape: ", X_test.shape)

In [None]:
# # Define the LDA classifier
# cv_lda_classifier = LDA()

# # Use StratifiedKFold to maintain class distribution in each fold
# kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=43)

# # Perform cross-validation and compute accuracy for each fold
# cv_scores = cross_val_score(cv_lda_classifier, X, y, cv=kf, scoring='accuracy')
# # Print cross-validation scores
# print(f'Cross-validation scores for each fold: {cv_scores}')
# print(f'Average cross-validation accuracy: {cv_scores.mean():.2f} ± {cv_scores.std():.2f}')

In [None]:
# Dictionary to store the trained models and their accuracies
trained_models = {}
model_accuracies = {}
cv_results = {}
kappa_results = {}

# Variable to track the best cross-validation score and corresponding model name
best_cv_score = 0
best_cv_model_name = ""

# Train and evaluate each model
for name, model in models.items():
    model.fit(X_train, y_train)  # Train the model
    y_pred = model.predict(X_test)  # Predict on test data
    accuracy = accuracy_score(y_test, y_pred)  # Calculate accuracy
    print(f'{name} Accuracy: {accuracy * 100:.2f}%')

    # Kappa Score
    kappa_score = kappa_scorer(lda_classifier, X_test, y_test)
    print("Kappa Score on Test set: ", kappa_score)

    # Save the trained model to the dictionary
    trained_models[name] = model
    # Save the accuracy to the dictionary
    # model_accuracies[name] = accuracy * 100
    model_accuracies[name] = accuracy
    # kappa_results[name] = kappa_score * 100
    kappa_results[name] = kappa_score

    # Perform Stratified K-Fold cross-validation
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=43)
    cv_scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')
    cv_results[name] = cv_scores

    # Check if this model has the best cross-validation score
    if cv_scores.mean() > best_cv_score:
        best_cv_score = cv_scores.mean()
        best_cv_model_name = name

    # Print cross-validation results
    print(f'Cross-validation scores for {name}: {cv_scores}')
    print(f'Average cross-validation accuracy for {name}: {cv_scores.mean():.2f} ± {cv_scores.std():.2f}')

    print("--"*50)

## Best Results

In [None]:
# Print the model with the highest average cross-validation score
print(f'Best Model based on cross-validation: {best_cv_model_name} with Average CV Accuracy: {best_cv_score:.2f}')

In [None]:
# Find the model with the highest test set accuracy
best_model_name = max(model_accuracies, key=model_accuracies.get)
max_accuracy = model_accuracies[best_model_name]
print(f'Best Model based on test accuracy: {best_model_name} with Accuracy: {max_accuracy:.2f}%')

In [None]:
# Find the model with the highest test set accuracy
best_kappa_model_name = max(kappa_results, key=kappa_results.get)
max_kappa = kappa_results[best_kappa_model_name]
print(f'Best Model based on test Kappa : {best_kappa_model_name} with Kappa: {max_kappa:.2f}%')

In [None]:
# # Optionally, you can save the best model for later use
# best_model = trained_models[best_model_name]
# joblib.dump(best_model, f'{best_model_name}_model.joblib')

# To load and use the best model later
# loaded_best_model = joblib.load(f'{best_model_name}_model.joblib')
# predictions_transformed = loaded_best_model.predict(new_data)  # Replace 'new_data' with your actual data
# predictions = label_encoder.inverse_transform(predictions_transformed)

## Stacking

In [None]:
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score


# Define base models
base_models = [
    ('svc', SVC(kernel='rbf', probability=True)),
    # ('linear_svc', LinearSVC(max_iter=10000)),
    ('log_reg', LogisticRegression(max_iter=10000)),
    ('lda', LinearDiscriminantAnalysis()),
    ('extra_trees', ExtraTreesClassifier(n_estimators=100, random_state=42)),
    ('mlp', MLPClassifier(max_iter=1000, random_state=42))
]

# Define meta-learner
meta_learner = LogisticRegression()

# Initialize and train the stacking model
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_learner)
stacking_model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = stacking_model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')

In [None]:
# Print accuracy
accuracy_stack = accuracy_score(y_test, y_pred)
print("Accuracy for stackL: ", accuracy_stack)

In [None]:
# Kappa Score
kappa_score = kappa_scorer(stacking_model, X_test, y_test)
print("Kappa Score on Test set: ", kappa_score)

## My Stacking

In [None]:
import pickle
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


In [None]:
# Load pre-trained models (replace with your own paths)
with open('magdy\magdy_best_linear_svm.pkl', 'rb') as f:
    svc = pickle.load(f)

with open('magdy\magdy_best_linear_svc.pkl', 'rb') as f:
    linear_svc = pickle.load(f)

with open('magdy\magdy_best_lr.pkl', 'rb') as f:
    log_reg = pickle.load(f)

with open('magdy\magdy_best_lda.pkl', 'rb') as f:
    lda = pickle.load(f)

with open('magdy\magdy_best_extra_trees.pkl', 'rb') as f:
    extra_trees = pickle.load(f)

with open('magdy\magdy_best_linear_mlp.pkl', 'rb') as f:
    mlp = pickle.load(f)


In [None]:
models = {}

models['LDA'] = lda
models['MLP'] = mlp
models['Linear_SVC'] = linear_svc
models['Linear_SVM'] = svc
models['LR'] = log_reg
# models['extra_trees'] = extra_trees

In [None]:
# Define base models with preprocessing (if needed)
base_models = [
    ('svc', make_pipeline(StandardScaler(), svc)),
    ('linear_svc', make_pipeline(StandardScaler(), linear_svc)),
    ('log_reg', make_pipeline(StandardScaler(), log_reg)),
    ('lda', lda),
    # ('extra_trees', extra_trees),
    ('mlp', make_pipeline(StandardScaler(), mlp))
]

# Define meta-learner
meta_learner = LogisticRegression(max_iter=10000)

# Initialize and train the stacking model
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_learner, cv=5)
stacking_model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = stacking_model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')


In [None]:
# Kappa Score
kappa_score = kappa_scorer(stacking_model, X_test, y_test)
print("Kappa Score on Test set: ", kappa_score)

## My Stacking Manual

In [None]:
# from collections import Counter

# # Dictionary to hold predictions from each model
# predictions = {}

# # Predict using each model and store the results
# for model_name, model in models.items():
#     prediction = model.predict(X_test[0])
#     predictions[model_name] = int(prediction[0])
#     print(f"{model_name}: Predicted class: {prediction[0]}")

# # Get the mode of the classifications
# prediction_counts = Counter(predictions.values())
# if prediction_counts[1] > prediction_counts[2]:
#     final_prediction = "Grasp"
# elif prediction_counts[1] < prediction_counts[2]:
#     final_prediction = "Release"


In [None]:
models = {}

models['LDA'] = lda
models['MLP'] = mlp
# models['Linear_SVC'] = linear_svc
models['Linear_SVM'] = svc
models['LR'] = log_reg
models['extra_trees'] = extra_trees

In [None]:
from collections import Counter

# Function to make predictions using each model and get the mode of the predictions
def predict_ensemble(models, X):
    predictions = {}
    for model_name, model in models.items():
        prediction = model.predict(X)
        predictions[model_name] = int(prediction[0])
    
    # Get the mode of the classifications
    prediction_counts = Counter(predictions.values())
    final_prediction = prediction_counts.most_common(1)[0][0]
    return final_prediction

In [None]:
# List to hold final predictions
final_predictions = []

# Make predictions on the test data
for i in range(X_test.shape[0]):
    final_prediction = predict_ensemble(models, [X_test[i]])
    final_predictions.append(final_prediction)

# Calculate accuracy
accuracy = accuracy_score(y_test, final_predictions)
print(f'Ensemble Model Accuracy: {accuracy}')

In [None]:
predictions = {}
for model_name, model in models.items():
    prediction = model.predict(X)
    predictions[model_name] = int(prediction[0])

# Get the mode of the classifications
prediction_counts = Counter(predictions.values())
final_prediction = prediction_counts.most_common(1)[0][0]

In [None]:
from sklearn.metrics import accuracy_score, cohen_kappa_score

# Calculate Cohen's Kappa score
kappa_score = cohen_kappa_score(y_test, final_predictions)
print(f'Cohen\'s Kappa Score: {kappa_score}')

## Soft Voting

In [None]:
from sklearn.ensemble import VotingClassifier

# Create a soft voting classifier
voting_clf = VotingClassifier(estimators=[
    ('svc', svc),
    ('linear_svc', linear_svc),
    ('log_reg', log_reg),
    ('lda', lda),
    # ('extra_trees', extra_trees),
    ('mlp', mlp)
], voting='hard')

# Fit the classifier on the training data
voting_clf.fit(X_train, y_train)


In [None]:
# Make predictions on the test data
y_pred = voting_clf.predict(X_test)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Ensemble Model Accuracy: {accuracy}')

# Calculate Cohen's Kappa score
kappa_score = cohen_kappa_score(y_test, y_pred)
print(f'Cohen\'s Kappa Score: {kappa_score}')

## Soft Voting 

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.neural_network import MLPClassifier

# Define the individual classifiers
svc = SVC(probability=True)
linear_svc = SVC(kernel='linear', probability=True)
log_reg = LogisticRegression()
lda = LDA()
mlp = MLPClassifier()

# Create a soft voting classifier
voting_clf = VotingClassifier(estimators=[
    ('svc', svc),
    ('linear_svc', linear_svc),
    ('log_reg', log_reg),
    ('lda', lda),
    ('mlp', mlp)
], voting='soft')

# Fit the classifier on the training data
voting_clf.fit(X_train, y_train)

# Predict on new data
y_pred = voting_clf.predict(X_test)


In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Ensemble Model Accuracy: {accuracy}')

# Calculate Cohen's Kappa score
kappa_score = cohen_kappa_score(y_test, y_pred)
print(f'Cohen\'s Kappa Score: {kappa_score}')

## Trial Prediction Time

## Soft Voting 

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.neural_network import MLPClassifier

# Define the individual classifiers
svc = SVC(probability=True)
linear_svc = SVC(kernel='linear', probability=True)
log_reg = LogisticRegression()
lda = LDA()
mlp = MLPClassifier()

# Create a soft voting classifier
voting_clf = VotingClassifier(estimators=[
    ('svc', svc),
    ('linear_svc', linear_svc),
    ('log_reg', log_reg),
    ('lda', lda),
    ('mlp', mlp)
], voting='soft')

# Fit the classifier on the training data
voting_clf.fit(X_train, y_train)

# Predict on new data
y_pred = voting_clf.predict(X_test)


In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Ensemble Model Accuracy: {accuracy}')

# Calculate Cohen's Kappa score
kappa_score = cohen_kappa_score(y_test, y_pred)
print(f'Cohen\'s Kappa Score: {kappa_score}')

In [None]:
epochs['Right Grasp'][0].get_data().shape

In [None]:
# fbcsp_clf.evaluateTrial(epochs['Right Grasp'][0].get_data())

In [None]:
trial_features = fbcsp_clf.extractTrialFeatures(epochs['Right Grasp'][0].get_data())
# print(trial_features.shape)
trial_features

In [None]:
trained_models["LDA"].predict(trial_features)
# LDA.predict

In [None]:
trained_models["LDA"].predict_proba(trial_features)

### Release

In [None]:
# fbcsp_clf.evaluateTrial(epochs['Right Release'][9].get_data())

In [None]:
trial_features = fbcsp_clf.extractTrialFeatures(epochs['Right Release'][9].get_data())
# print(trial_features.shape)
trial_features

In [None]:
trained_models["LDA"].predict(trial_features)


## Save Models

In [None]:
import pickle

# Save the FBCSP to a file
with open(sub_name+'_fbcsp_clf.pkl', 'wb') as file:
    pickle.dump(fbcsp_clf, file)

In [None]:
# Save the trained model to a file
with open(sub_name+'_LDA_clf.pkl', 'wb') as file:
    pickle.dump(trained_models["LDA"], file)

## Load model

In [None]:
# Load the FBCSP from the file
with open('fbcsp_clf_sherif.pkl', 'rb') as file:
    loaded_fbcsp_clf = pickle.load(file)

In [None]:
# Load the trained model from the file
with open('LDA_clf_sherif.pkl', 'rb') as file:
    loaded_LDA_clf = pickle.load(file)

## Trial Prediction with loaded models

In [None]:
trial_features = loaded_fbcsp_clf.extractTrialFeatures(epochs['Right Release'][9].get_data())
# print(trial_features.shape)
trial_features

In [None]:
loaded_LDA_clf.predict(trial_features)

In [None]:
prediction = loaded_LDA_clf.predict(trial_features)
prediction[0]

In [None]:
type(prediction[0])

# Grid Search

In [None]:
# # Dictionary to hold models and their names
# models = {
#     'SVM': SVC(kernel='rbf', C=1, gamma='scale'),
#     'Linear SVC': LinearSVC(max_iter=10000, random_state=42),
#     'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
#     'k-NN': KNeighborsClassifier(n_neighbors=5),
#     'Logistic Regression': LogisticRegression(solver='lbfgs', max_iter=1000),
#     'LDA': LinearDiscriminantAnalysis(),
#     'Naive Bayes': GaussianNB(),
#     'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=42),
#     'Decision Tree': DecisionTreeClassifier(random_state=42),
#     'MLP Classifier': MLPClassifier(hidden_layer_sizes=(100,), max_iter=300, random_state=42),
# }

In [None]:
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV

## SVM

In [None]:
# Define the parameter grid
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 1, 0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf', 'poly'],
    'tol': [1e-4, 1e-3, 1e-2],
    'max_iter': [1000, 2000, 3000],
    'degree': [2, 3, 4],  # Only used for 'poly' kernel
}

# Initialize the SVM classifier
svm = SVC()

# Initialize the GridSearchCV object
grid_search_svm = GridSearchCV(svm, param_grid, refit=True, verbose=2, cv=5)

# Fit the model
grid_search_svm.fit(X_train, y_train)

# Print the best parameters and estimator
print("Best parameters found: ", grid_search_svm.best_params_)
print("Best estimator: ", grid_search_svm.best_estimator_)



# Use the best parameters to fit the model
best_linear_svm = grid_search_svm.best_estimator_

# Predict using the best model
y_pred_svm = best_linear_svm.predict(X_test)

# Print classification report
print("Classification report for SVM:\n", classification_report(y_test, y_pred_svm))

# Print accuracy
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print("Accuracy for SVM: ", accuracy_svm)

In [None]:
# Save the best LDA model
with open(sub_name+'_best_linear_svm.pkl', 'wb') as file:
    pickle.dump(best_linear_svm, file)

- Best parameters found:  {'C': 1, 'degree': 2, 'gamma': 0.01, 'kernel': 'rbf', 'max_iter': 1000, 'tol': 0.01}
- Best estimator:  SVC(C=1, degree=2, gamma=0.01, max_iter=1000, tol=0.01)
- Accuracy: 0.69

## MLP

In [None]:
# Define the parameter grid
param_grid_mlp = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

# Initialize the MLP classifier
mlp = MLPClassifier(max_iter=100)

# Initialize the GridSearchCV object
grid_search_mlp = GridSearchCV(mlp, param_grid_mlp, refit=True, verbose=2, cv=5)

# Fit the model
grid_search_mlp.fit(X_train, y_train)

# Print the best parameters and estimator
print("Best parameters for MLP found: ", grid_search_mlp.best_params_)
print("Best MLP estimator: ", grid_search_mlp.best_estimator_)



# Use the best parameters to fit the model
best_linear_mlp = grid_search_mlp.best_estimator_

# Predict using the best model
y_pred_mlp = best_linear_mlp.predict(X_test)

# Print classification report
print("Classification report for MLP:\n", classification_report(y_test, y_pred_mlp))

# Print accuracy
accuracy_mlp = accuracy_score(y_test, y_pred_mlp)
print("Accuracy for MLP: ", accuracy_mlp)

In [None]:
# Save the best LDA model
with open(sub_name+'_best_linear_mlp.pkl', 'wb') as file:
    pickle.dump(best_linear_mlp, file)

- Best parameters for MLP found:  {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'adaptive', 'solver': 'sgd'}
- Best MLP estimator:  MLPClassifier(activation='tanh', hidden_layer_sizes=(50, 50, 50),
              learning_rate='adaptive', max_iter=100, solver='sgd')
- Accuracy: 0.7

## LDA

In [None]:
# Define the parameter grid
param_grid_lda = {
    'solver': ['svd', 'lsqr', 'eigen'],
    'priors': [None],
    'n_components': [None, 1, 2],
    'store_covariance': [False, True],
    'tol': [0.0001, 1e-4, 1e-3, 1e-2],
    'covariance_estimator': [None]
}

# Initialize the LDA classifier
lda = LinearDiscriminantAnalysis()

# Initialize the GridSearchCV object
grid_search_lda = GridSearchCV(lda, param_grid_lda, refit=True, verbose=2, cv=5)

# Fit the model
grid_search_lda.fit(X_train, y_train)

# Print the best parameters and estimator
print("Best parameters for LDA found: ", grid_search_lda.best_params_)
print("Best LDA estimator: ", grid_search_lda.best_estimator_)



# Use the best parameters to fit the model
best_lda = grid_search_lda.best_estimator_

# Predict using the best model
y_pred_lda = best_lda.predict(X_test)

# Print classification report
print("Classification report for LDA:\n", classification_report(y_test, y_pred_lda))

# Print accuracy
accuracy_lda = accuracy_score(y_test, y_pred_lda)
print("Accuracy for LinearSVC: ", accuracy_lda)

In [None]:
# Save the best LDA model
with open(sub_name+'_best_lda.pkl', 'wb') as file:
    pickle.dump(best_lda, file)

- Best parameters for LDA found:  {'covariance_estimator': None, 'n_components': None, 'priors': None, 'solver': 'svd', 'store_covariance': False, 'tol': 0.0001}
- Best LDA estimator:  LinearDiscriminantAnalysis()
- Accuracy: 0.77

## Linear SVC

In [None]:
# Define the parameter grid
param_grid_linear_svc = {
    'penalty': ['l2'],
    'loss': ['squared_hinge'],
    'dual': [True, False],  # must be True if 'penalty' is 'l1'
    'tol': [1e-4, 1e-3, 1e-2],
    'class_weight': [None, 'balanced'],
    'verbose': [0, 1],
    'random_state': [None, 42],
    'max_iter': [1000, 2000, 3000]
}

# Initialize the LinearSVC classifier
linear_svc = LinearSVC()

# Initialize the GridSearchCV object
grid_search_linear_svc = GridSearchCV(linear_svc, param_grid_linear_svc, refit=True, verbose=2, cv=5)

# Fit the model
grid_search_linear_svc.fit(X_train, y_train)

# Print the best parameters and estimator
print("Best parameters for LinearSVC found: ", grid_search_linear_svc.best_params_)
print("Best LinearSVC estimator: ", grid_search_linear_svc.best_estimator_)



# Use the best parameters to fit the model
best_linear_svc = grid_search_linear_svc.best_estimator_

# Predict using the best model
y_pred_linear_svc = best_linear_svc.predict(X_test)

# Print classification report
print("Classification report for LinearSVC:\n", classification_report(y_test, y_pred_linear_svc))

# Print accuracy
accuracy_linear_svc = accuracy_score(y_test, y_pred_linear_svc)
print("Accuracy for LinearSVC: ", accuracy_linear_svc)

In [None]:
# Save the best LDA model
with open(sub_name+'_best_linear_svc.pkl', 'wb') as file:
    pickle.dump(best_linear_svc, file)

- Best parameters for LinearSVC found:  {'class_weight': None, 'dual': True, 'loss': 'squared_hinge', 'max_iter': 1000, 'penalty': 'l2', 'random_state': None, 'tol': 0.0001, 'verbose': 0}
- Best LinearSVC estimator:  LinearSVC(dual=True)
- Accuracy: 0.77

## LR

In [None]:
# Define the parameter grid
param_grid_lr = {
    'solver': ['liblinear', 'lbfgs', 'newton-cg', 'sag', 'saga'],
    'tol': [1e-4, 1e-3, 1e-2],
    'intercept_scaling': [1, 10, 100],
    'class_weight': [None, 'balanced'],
    'random_state': [None, 42],
    'max_iter': [1000, 800, 500, 300, 200, 100],
    'multi_class': ['auto', 'ovr', 'multinomial'],
    'warm_start': [False, True],
    'n_jobs': [None, -1],
}


# Initialize the Logistic Regression classifier
lr = LogisticRegression(max_iter=1000)

# Initialize the GridSearchCV object
grid_search_lr = GridSearchCV(lr, param_grid_lr, refit=True, verbose=2, cv=5)

# Fit the model
grid_search_lr.fit(X_train, y_train)

# Print the best parameters and estimator
print("Best parameters for Logistic Regression found: ", grid_search_lr.best_params_)
print("Best Logistic Regression estimator: ", grid_search_lr.best_estimator_)



# Use the best parameters to fit the model
best_lr = grid_search_lr.best_estimator_

# Predict using the best model
y_pred_lr = best_lr.predict(X_test)

# Print classification report
print("Classification report for LinearSVC:\n", classification_report(y_test, y_pred_lr))

# Print accuracy
accuracy_lr = accuracy_score(y_test, y_pred_lr)
print("Accuracy for LinearSVC: ", accuracy_lr)

In [None]:
# Save the best LDA model
with open(sub_name+'_best_lr.pkl', 'wb') as file:
    pickle.dump(best_lr, file)

- Best parameters for Logistic Regression found:  {'class_weight': None, 'intercept_scaling': 1, 'max_iter': 1000, 'multi_class': 'ovr', 'n_jobs': -1, 'random_state': None, 'solver': 'saga', 'tol': 0.01, 'warm_start': False}
- Best Logistic Regression estimator:  LogisticRegression(max_iter=1000, multi_class='ovr', n_jobs=-1, solver='saga',
                   tol=0.01)
- Accuracy: 0.77

## Extra Trees

In [None]:
from sklearn.ensemble import ExtraTreesClassifier

# Define the parameter grid
param_grid_extra_trees = {
    'n_estimators': [50, 100, 200],
#     'criterion': ['gini', 'entropy', 'log_loss'], no
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
#     'min_samples_leaf': [1, 2, 4], no
#     'min_weight_fraction_leaf': [0.0, 0.1, 0.2], no
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_leaf_nodes': [None, 10, 20, 30],
#     'min_impurity_decrease': [0.0, 0.1, 0.2], no
#     'bootstrap': [False, True],
#     'oob_score': [False, True],
#     'n_jobs': [None, -1],
#     'random_state': [None, 42], no
#     'verbose': [0, 1],
#     'warm_start': [False, True],
#     'class_weight': [None, 'balanced'],
    'ccp_alpha': [0.0, 0.1, 0.2],
    'max_samples': [None, 0.5, 0.7]

#     'n_estimators': [50, 100, 200],
#     'max_features': ['auto', 'sqrt', 'log2'],
#     'max_depth': [None, 10, 20, 30],
#     'min_samples_split': [2, 5, 10]
}

# Initialize the Extra Trees Classifier
extra_trees = ExtraTreesClassifier()

# Initialize the GridSearchCV object
grid_search_extra_trees = GridSearchCV(extra_trees, param_grid_extra_trees, refit=True, verbose=2, cv=5)

# Fit the model
grid_search_extra_trees.fit(X_train, y_train)

# Print the best parameters and estimator
print("Best parameters for Extra Trees found: ", grid_search_extra_trees.best_params_)
print("Best Extra Trees estimator: ", grid_search_extra_trees.best_estimator_)



# Use the best parameters to fit the model
best_extra_trees = grid_search_extra_trees.best_estimator_

# Predict using the best model
y_pred_extra_trees = best_extra_trees.predict(X_test)

# Print classification report
print("Classification report for LinearSVC:\n", classification_report(y_test, y_pred_extra_trees))

# Print accuracy
accuracy_extra_trees = accuracy_score(y_test, y_pred_extra_trees)
print("Accuracy for LinearSVC: ", accuracy_extra_trees)

In [None]:
# Save the best LDA model
with open(sub_name+'_best_extra_trees.pkl', 'wb') as file:
    pickle.dump(best_extra_trees, file)

- Best parameters for Extra Trees found:  {'ccp_alpha': 0.0, 'max_depth': 30, 'max_features': 'sqrt', 'max_leaf_nodes': 10, 'min_samples_split': 5, 'n_estimators': 50}
- Best Extra Trees estimator:  ExtraTreesClassifier(max_depth=30, max_leaf_nodes=10, min_samples_split=5,
                     n_estimators=50)
- Accuracy: 0.7

In [None]:
# from sklearn.ensemble import ExtraTreesClassifier 70

# Define the parameter grid
param_grid_extra_trees = {
    'n_estimators': [50, 100, 200],
#     'criterion': ['gini', 'entropy', 'log_loss'], no
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
#     'min_samples_leaf': [1, 2, 4], no
#     'min_weight_fraction_leaf': [0.0, 0.1, 0.2], no
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_leaf_nodes': [None, 10, 20, 30],
#     'min_impurity_decrease': [0.0, 0.1, 0.2],
#     'bootstrap': [False, True],
#     'oob_score': [False, True],
#     'n_jobs': [None, -1],
#     'random_state': [None, 42],
#     'verbose': [0, 1],
#     'warm_start': [False, True],
#     'class_weight': [None, 'balanced'],
    'ccp_alpha': [0.0, 0.1, 0.2],
#     'max_samples': [None, 0.5, 0.7]

#     'n_estimators': [50, 100, 200],
#     'max_features': ['auto', 'sqrt', 'log2'],
#     'max_depth': [None, 10, 20, 30],
#     'min_samples_split': [2, 5, 10]
}


In [None]:
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
data = load_iris()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter grid
param_grid_linear_svc = {
    'C': [0.1, 1, 10, 100],
    'max_iter': [1000, 2000, 3000]
}

# Initialize the LinearSVC classifier
linear_svc = LinearSVC()

# Initialize the GridSearchCV object
grid_search_linear_svc = GridSearchCV(linear_svc, param_grid_linear_svc, refit=True, verbose=2, cv=5)

# Fit the model
grid_search_linear_svc.fit(X_train, y_train)

# Print the best parameters and estimator
print("Best parameters for LinearSVC found: ", grid_search_linear_svc.best_params_)
print("Best LinearSVC estimator: ", grid_search_linear_svc.best_estimator_)

# Use the best parameters to fit the model
best_linear_svc = grid_search_linear_svc.best_estimator_

# Predict using the best model
y_pred_linear_svc = best_linear_svc.predict(X_test)

# Print classification report
print("Classification report for LinearSVC:\n", classification_report(y_test, y_pred_linear_svc))

# Print accuracy
accuracy_linear_svc = accuracy_score(y_test, y_pred_linear_svc)
print("Accuracy for LinearSVC with best parameters: ", accuracy_linear_svc)
