In [689]:
import pandas as pd

transform_eeg_data = pd.read_csv('processed_data\\transformed_pca.csv')

In [690]:
# Must be removed

import numpy as np
from sklearn.metrics import confusion_matrix

def custom_classification_report(y_true, y_pred, target_names, class_indices):
    # Compute the confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Initialize lists to store precision, recall, F1-score, and support for each class
    precision = []
    recall = []
    f1_score = []
    support = []

    # Calculate precision, recall, F1-score for each class
    for i in range(len(target_names)):
        true_positives = cm[i, i]
        false_positives = cm[:, i].sum() - true_positives
        false_negatives = cm[i, :].sum() - true_positives
        true_negatives = cm.sum() - (true_positives + false_positives + false_negatives)

        # Precision: TP / (TP + FP)
        if true_positives + false_positives > 0:
            precision_i = true_positives / (true_positives + false_positives)
        else:
            precision_i = 0.0

        # Recall: TP / (TP + FN)
        if true_positives + false_negatives > 0:
            recall_i = true_positives / (true_positives + false_negatives)
        else:
            recall_i = 0.0

        # F1-Score: 2 * (Precision * Recall) / (Precision + Recall)
        if precision_i + recall_i > 0:
            f1_i = 2 * (precision_i * recall_i) / (precision_i + recall_i)
        else:
            f1_i = 0.0

        # Support: The number of true instances of each class
        support_i = cm[i, :].sum()

        # Append calculated metrics for this class
        precision.append(precision_i)
        recall.append(recall_i)
        f1_score.append(f1_i)
        support.append(support_i)

    # Calculate accuracy
    accuracy = np.trace(cm) / np.sum(cm)

    # Calculate average F1-score for specified classes
    f1_average = np.mean([f1_score[i] for i in class_indices])

    # Print the aesthetically improved report
    print("\n" + "Classification Report".center(65, "="))
    print(f"{'Class':<15}{'Precision':>12}{'Recall':>12}{'F1-Score':>12}{'Support':>12}")
    print("=" * 65)
    for i, label in enumerate(target_names):
        print(f"{label:<15}{precision[i]:>12.4f}{recall[i]:>12.4f}{f1_score[i]:>12.4f}{support[i]:>12}")
    print("=" * 65)
    print(f"Average F1-Score for classes : {f1_average:.4f}")
    print(f"Accuracy : {accuracy:.4f}")
    print("=" * 65)

class_indices = [0, 1, 2]

In [691]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
transform_eeg_data['GROUP'] = le.fit_transform(transform_eeg_data['GROUP'])
# Convert object columns to numeric
for col in transform_eeg_data.columns:
    if transform_eeg_data[col].dtype == 'object':
        transform_eeg_data[col] = pd.to_numeric(transform_eeg_data[col], errors='coerce')

# Features and target
X = transform_eeg_data.drop('GROUP', axis=1)
y = transform_eeg_data['GROUP']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)


In [692]:
params = {
    'objective': 'multi:softmax',
    'num_class': 3,  # Number of classes
    'learning_rate': 0.001,
    'max_depth': 10,
    'subsample': 0.6478457734751482,
    'colsample_bytree': 0.9431214021788126,
    # 'device': 'cuda',  # Use GPU
    'random_state': 42,
}

train_data = xgb.DMatrix(X_train, label=y_train)
test_data = xgb.DMatrix(X_test, label=y_test)

xgb_model = xgb.train(params, train_data,num_boost_round=462)

# Predict the class labels
y_pred = xgb_model.predict(test_data)

In [693]:
print('Expected features:', xgb_model.feature_names)
print('Dataset columns:', transform_eeg_data.columns.tolist())

Expected features: ['Unnamed: 0', 'f7', 'cp5', 'p3', 'o1', 'o2', 'fc6', 'cp3', 'af8', 'empty']
Dataset columns: ['Unnamed: 0', 'f7', 'cp5', 'p3', 'o1', 'o2', 'fc6', 'cp3', 'af8', 'empty', 'GROUP']


In [694]:
target_names=list(map(str,le.classes_))

In [695]:
# Generate and print classification report
report = custom_classification_report(y_test, y_pred, target_names=target_names,class_indices = [0, 1, 2])


Class             Precision      Recall    F1-Score     Support
0                    0.7778      0.8750      0.8235           8
1                    0.6364      0.7778      0.7000           9
2                    0.0000      0.0000      0.0000           3
Average F1-Score for classes : 0.5078
Accuracy : 0.7000


In [696]:
from sklearn.ensemble import RandomForestClassifier


# creating a RF classifier
clf = RandomForestClassifier(n_estimators = 100)  
 
# Training the model on the training dataset
# fit function is used to train the model using the training sets as parameters
clf.fit(X_train, y_train)
 
# performing predictions on the test dataset
y_pred_rf = clf.predict(X_test)
 
custom_classification_report(y_test, y_pred_rf, target_names=target_names,class_indices = [0, 1, 2])


Class             Precision      Recall    F1-Score     Support
0                    0.7000      0.8750      0.7778           8
1                    0.7000      0.7778      0.7368           9
2                    0.0000      0.0000      0.0000           3
Average F1-Score for classes : 0.5049
Accuracy : 0.7000


In [697]:
from sklearn.linear_model import LogisticRegression

# creating a RF classifier
log_reg = LogisticRegression(random_state=42, max_iter=1000)
 
# Training the model on the training dataset
# fit function is used to train the model using the training sets as parameters
log_reg.fit(X_train, y_train)
 
# performing predictions on the test dataset
y_pred_log_reg = log_reg.predict(X_test)
 
custom_classification_report(y_test, y_pred_log_reg, target_names=target_names,class_indices = [0, 1, 2])


Class             Precision      Recall    F1-Score     Support
0                    0.6667      0.7500      0.7059           8
1                    0.6667      0.6667      0.6667           9
2                    0.5000      0.3333      0.4000           3
Average F1-Score for classes : 0.5908
Accuracy : 0.6500


In [698]:
X_train.shape

(28, 10)

In [699]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import f1_score, accuracy_score

import torch.nn as nn
import torch.optim as optim

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        h0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        c0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # Extract the output from the last time step
        return out

# Prepare the data
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).unsqueeze(1)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True)

# Initialize the model, loss function, and optimizer
input_size = X_train.shape[1]
hidden_size = 128
num_layers = 4
num_classes = 3

model = LSTMModel(input_size, hidden_size, num_layers, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs.data, 1)
        y_true.extend(y_batch.numpy())
        y_pred.extend(predicted.numpy())

# Calculate F1 score and accuracy
f1 = f1_score(y_true, y_pred, average='weighted')
accuracy = accuracy_score(y_true, y_pred)

print(f'F1 Score: {f1:.4f}')
print(f'Accuracy: {accuracy * 100:.2f}%')

Epoch [1/50], Loss: 1.0844
Epoch [2/50], Loss: 1.0598
Epoch [3/50], Loss: 1.0254
Epoch [4/50], Loss: 0.6307
Epoch [5/50], Loss: 1.1819
Epoch [6/50], Loss: 0.8507
Epoch [7/50], Loss: 0.7664
Epoch [8/50], Loss: 0.5546
Epoch [9/50], Loss: 0.6613
Epoch [10/50], Loss: 0.5728
Epoch [11/50], Loss: 0.8226
Epoch [12/50], Loss: 0.6433
Epoch [13/50], Loss: 0.8756
Epoch [14/50], Loss: 0.3103
Epoch [15/50], Loss: 1.2916
Epoch [16/50], Loss: 0.4477
Epoch [17/50], Loss: 0.6149
Epoch [18/50], Loss: 0.5584
Epoch [19/50], Loss: 0.7529
Epoch [20/50], Loss: 0.4820
Epoch [21/50], Loss: 0.7213
Epoch [22/50], Loss: 0.3418
Epoch [23/50], Loss: 0.5584
Epoch [24/50], Loss: 0.4458
Epoch [25/50], Loss: 0.7534
Epoch [26/50], Loss: 0.3004
Epoch [27/50], Loss: 0.0457
Epoch [28/50], Loss: 0.1519
Epoch [29/50], Loss: 0.0441
Epoch [30/50], Loss: 0.2120
Epoch [31/50], Loss: 0.2350
Epoch [32/50], Loss: 0.8322
Epoch [33/50], Loss: 0.0823
Epoch [34/50], Loss: 0.2149
Epoch [35/50], Loss: 0.7742
Epoch [36/50], Loss: 0.0980
E

In [700]:
# # Applying PCA function on training
# # and testing set of X component
# from sklearn.decomposition import PCA

# pca = PCA(n_components=0.90)

# X_train_pca = pca.fit_transform(X_train)
# X_test_pca = pca.transform(X_test)

# explained_variance = pca.explained_variance_ratio_

In [701]:
# explained_variance

In [702]:
# train_data_pca = xgb.DMatrix(X_train_pca, label=y_train)
# test_data_pca = xgb.DMatrix(X_test_pca, label=y_test)

# xgb_model_pca = xgb.train(params, train_data_pca,num_boost_round=462)

# # Predict the class labels
# y_pred_pca = xgb_model_pca.predict(test_data_pca)

In [703]:
# # Generate and print classification report
# report = custom_classification_report(y_test, y_pred_pca, target_names=target_names,class_indices = [0, 1, 2])

In [704]:
# from sklearn.ensemble import RandomForestClassifier


# # creating a RF classifier
# clf = RandomForestClassifier(n_estimators = 100)  
 
# # Training the model on the training dataset
# # fit function is used to train the model using the training sets as parameters
# clf.fit(X_train_pca, y_train)
 
# # performing predictions on the test dataset
# y_pred_rf = clf.predict(X_test_pca)
 
# custom_classification_report(y_test, y_pred_rf, target_names=target_names,class_indices = [0, 1, 2])

In [705]:
# from sklearn.linear_model import LogisticRegression

# # creating a RF classifier
# log_reg = LogisticRegression(random_state=42)
 
# # Training the model on the training dataset
# # fit function is used to train the model using the training sets as parameters
# log_reg.fit(X_train_pca, y_train)
 
# # performing predictions on the test dataset
# y_pred_log_reg = log_reg.predict(X_test_pca)
 
# custom_classification_report(y_test, y_pred_log_reg, target_names=target_names,class_indices = [0, 1, 2])

In [706]:
# import torch
# from torch.utils.data import DataLoader, TensorDataset
# from sklearn.metrics import f1_score, accuracy_score

# import torch.nn as nn
# import torch.optim as optim

# # Define the LSTM model
# class LSTMModel(nn.Module):
#     def __init__(self, input_size, hidden_size, num_layers, num_classes):
#         super(LSTMModel, self).__init__()
#         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
#         self.fc = nn.Linear(hidden_size, num_classes)
    
#     def forward(self, x):
#         h0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
#         c0 = torch.zeros(num_layers, x.size(0), hidden_size).to(x.device)
        
#         out, _ = self.lstm(x, (h0, c0))
#         out = self.fc(out[:, -1, :])  # Extract the output from the last time step
#         return out

# # Prepare the data
# X_train_pca_tensor = torch.tensor(X_train_pca, dtype=torch.float32).unsqueeze(1)
# y_train_pca_tensor = torch.tensor(y_train.values, dtype=torch.long)
# X_test_pca_tensor = torch.tensor(X_test_pca, dtype=torch.float32).unsqueeze(1)
# y_test_pca_tensor = torch.tensor(y_test.values, dtype=torch.long)

# train_pca_dataset = TensorDataset(X_train_pca_tensor, y_train_pca_tensor)
# test_pca_dataset = TensorDataset(X_test_pca_tensor, y_test_pca_tensor)

# train_pca_loader = DataLoader(train_pca_dataset, batch_size=4, shuffle=True)
# test_pca_loader = DataLoader(test_pca_dataset, batch_size=4, shuffle=True)

# # Initialize the model, loss function, and optimizer
# input_size = X_train.shape[1]
# hidden_size = 128
# num_layers = 4
# num_classes = 3

# model = LSTMModel(input_size, hidden_size, num_layers, num_classes)
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)

# # Train the model
# num_epochs = 50
# for epoch in range(num_epochs):
#     model.train()
#     for X_batch, y_batch in train_loader:
#         optimizer.zero_grad()
#         outputs = model(X_batch)
#         loss = criterion(outputs, y_batch)
#         loss.backward()
#         optimizer.step()
    
#     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# # Evaluate the model
# model.eval()
# y_true = []
# y_pred = []
# with torch.no_grad():
#     for X_batch, y_batch in test_loader:
#         outputs = model(X_batch)
#         _, predicted = torch.max(outputs.data, 1)
#         y_true.extend(y_batch.numpy())
#         y_pred.extend(predicted.numpy())

# # Calculate F1 score and accuracy
# f1 = f1_score(y_true, y_pred, average='weighted')
# accuracy = accuracy_score(y_true, y_pred)

# print(f'F1 Score: {f1:.4f}')
# print(f'Accuracy: {accuracy * 100:.2f}%')