In [None]:
import glob
import numpy as np
import scipy.io as sio
GOOGLE_COLAB = True
path = ""
if GOOGLE_COLAB:
    from google.colab import drive, files
    drive.mount('/content/drive/')

EEG_data_directory_path = '/content/drive/MyDrive/ML/project/EEG_Data/25-users/*.txt'
Labels_directory_path = '/content/drive/MyDrive/ML/project/EEG_Data/labels/*.lab'
# For the EEG Data

data_file_paths = glob.glob(EEG_data_directory_path)

flattened_data = np.empty((0, 7168), dtype=float)

for data_file_path in data_file_paths:
    data = np.loadtxt(data_file_path)
    flattened_row = data.flatten().reshape(1, -1)
    flattened_data = np.concatenate((flattened_data, flattened_row), axis=0)

print("Shape of Flattened EEG Data:", flattened_data.shape)
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# For Labels
label_file_paths = glob.glob(Labels_directory_path)

labels = []

label_mapping = {"disike": 0, "like": 1}

for label_file_path in label_file_paths:
    with open(label_file_path, 'r') as label_file:
        label_word = label_file.read().strip().lower()
        label_value = label_mapping.get(label_word, None)
        if label_value is not None:
            labels.append(label_value)

labels = np.array(labels).reshape(-1, 1)

print("Shape of Labels:", labels.shape)

combined_data = np.hstack((flattened_data, labels))

print("Shape of Combined Data:", combined_data.shape)
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import glob

# Assuming you already have the data in 'combined_data' with shape (number_of_samples, 7169)
# The last column is assumed to be the labels

# Extract EEG data and labels
eeg_data = combined_data[:, :-1]
labels = combined_data[:, -1]

# Choose the number of signals to plot (e.g., first 5)
num_signals_to_plot = 40

# Plot the selected signals
for i in range(num_signals_to_plot):
    # Extract the i-th signal
    signal = eeg_data[i, :]

    # Create a time axis (assuming 7168 data points per signal)
    time_axis = np.arange(0, 7168)

    # Plot the signal
    plt.figure(figsize=(7, 4))
    plt.plot(time_axis, signal)
    plt.title(f'EEG Signal {i+1} - Label: {labels[i]}')
    plt.xlabel('Time')
    plt.ylabel('Amplitude')
    plt.show()

import mne
from scipy.signal import butter, filtfilt

# Function to apply bandpass filter
def bandpass_filter(data, lowcut, highcut, fs, order=4):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    filtered_data = filtfilt(b, a, data)
    return filtered_data

# Define the bandpass filter parameters
lowcut = 1.0  # lower cutoff frequency in Hz
highcut = 50.0  # upper cutoff frequency in Hz
fs = 128.0  # sampling frequency in Hz

# Apply bandpass filter to each EEG signal
filtered_data = np.apply_along_axis(lambda x: bandpass_filter(x, lowcut, highcut, fs), axis=1, arr=eeg_data)

# Segment the data into epochs (assuming 7168 data points per signal)
info = mne.create_info(ch_names=['EEG'], sfreq=fs, ch_types=['eeg'])
epochs = mne.EpochsArray(filtered_data[:, np.newaxis, :], info=info, tmin=0, verbose=False)

# ...

# Extract time-domain features (mean and standard deviation)
mean_features = epochs.get_data().mean(axis=2)
std_features = epochs.get_data().std(axis=2)

# Reshape labels to match the shape of feature arrays
labels_reshaped = labels.reshape(-1, 1)

# Combine extracted features
extracted_features = np.hstack((mean_features, std_features, labels_reshaped))

# # Plot the selected features
# for i in range(num_signals_to_plot):
#     plt.figure(figsize=(7, 4))
#     plt.bar(range(extracted_features.shape[1] - 1), extracted_features[i, :-1], tick_label=['Mean', 'Std'])
#     plt.title(f'Extracted Features for EEG Signal {i+1} - Label: {int(extracted_features[i, -1])}')
#     plt.xlabel('Feature')
#     plt.ylabel('Value')
#     plt.show()

# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score, confusion_matrix
# import seaborn as sns

# # ...

# # Reshape labels to match the shape of feature arrays
# labels_reshaped = labels.reshape(-1, 1)

# # Combine extracted features
# extracted_features = np.hstack((mean_features, std_features, labels_reshaped))

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(extracted_features[:, :-1], extracted_features[:, -1], test_size=0.2, random_state=42)

# # Initialize and train the logistic regression model
# model = LogisticRegression()
# model.fit(X_train, y_train)

# # Make predictions on the test set
# y_pred = model.predict(X_test)

# # Calculate accuracy
# accuracy = accuracy_score(y_test, y_pred)
# print("Accuracy:", accuracy)

# # Plot the confusion matrix
# cm = confusion_matrix(y_test, y_pred)
# plt.figure(figsize=(6, 4))
# sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Dislike', 'Like'], yticklabels=['Dislike', 'Like'])
# plt.title('Confusion Matrix')
# plt.xlabel('Predicted Label')
# plt.ylabel('True Label')
# plt.show()

# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score, confusion_matrix
# import seaborn as sns

# # ...

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(extracted_features[:, :-1], extracted_features[:, -1], test_size=0.2, random_state=42)

# # Initialize and train the SVM model
# svm_model = SVC(kernel='linear', C=1.0)  # You can adjust kernel and C parameter based on your needs
# svm_model.fit(X_train, y_train)

# # Make predictions on the test set
# y_pred_svm = svm_model.predict(X_test)

# # Calculate accuracy
# accuracy_svm = accuracy_score(y_test, y_pred_svm)
# print("SVM Accuracy:", accuracy_svm)

# # Plot the confusion matrix for SVM
# cm_svm = confusion_matrix(y_test, y_pred_svm)
# plt.figure(figsize=(6, 4))
# sns.heatmap(cm_svm, annot=True, fmt='d', cmap='Blues', xticklabels=['Dislike', 'Like'], yticklabels=['Dislike', 'Like'])
# plt.title('SVM Confusion Matrix')
# plt.xlabel('Predicted Label')
# plt.ylabel('True Label')
# plt.show()

import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns

# ...

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(extracted_features[:, :-1], extracted_features[:, -1], test_size=0.2, random_state=42)

X_train_normalized = tf.keras.utils.normalize(X_train, axis=1)
X_test_normalized = tf.keras.utils.normalize(X_test, axis=1)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Sigmoid activation for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train_normalized, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=0)

_, accuracy_nn = model.evaluate(X_test_normalized, y_test)
print("Neural Network Accuracy:", accuracy_nn)

y_pred_nn = (model.predict(X_test_normalized) > 0.5).astype(int)

# Plot the confusion matrix for the neural network
cm_nn = confusion_matrix(y_test, y_pred_nn)
plt.figure(figsize=(6, 4))
sns.heatmap(cm_nn, annot=True, fmt='d', cmap='Blues', xticklabels=['Dislike', 'Like'], yticklabels=['Dislike', 'Like'])
plt.title('Neural Network Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
