In [1]:
import pandas as pd
import numpy as np
import os

folder_path = '/content/drive/My Drive/eeg data'
file_list = os.listdir(folder_path)
files = [f for f in file_list if f.endswith('.csv')]
final_df_list = []

st_marks = ['start zero.png', 'start one.png', 'start two.jpg', 'start three.jpg', 'start four.jpg',
            'start five.jpg', 'start six.jpg', 'start seven.jpg', 'start eight.jpg', 'start nine.jpg',
            'start animal.jpg']
end_marks = ['end zero.png', 'end one.png', 'end two.jpg', 'end three.jpg', 'end four.jpg',
             'end five.jpg', 'end six.jpg', 'end seven.jpg', 'end eight.jpg', 'end nine.jpg',
             'end animal.jpg']

labels = {mark: i for i, mark in enumerate(st_marks)}

# Parameters for time slicing
window_size = 100
step_size = 50

def segment_and_label_data(df, start_marks, end_marks, labels_map):
    segments = []
    for start, end in zip(start_marks, end_marks):
        start_idx = df[df['Output1'] == start].index
        end_idx = df[df['Output1'] == end].index
        for i, j in zip(start_idx, end_idx):
            print(f"Start index: {i}, End index: {j}")  # Print start and end indices
            segment = df.iloc[i+1:j].copy()  # Exclude start and end markers
            print(f"Extracted segment between {i} and {j} with length {len(segment)}")  # Print segment length
            segment['label'] = labels[start]
            segments.append(segment)

    return pd.concat(segments, ignore_index=True) if segments else pd.DataFrame()

def time_slice(segment, window_size, step_size):
    sliced_segments = []
    for start in range(0, len(segment) - window_size + 1, step_size):
        end = start + window_size
        sliced_segment = segment.iloc[start:end].copy()
        if len(sliced_segment) == window_size:
            sliced_segments.append(sliced_segment)
    return sliced_segments

for file in files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)
    df.columns = ['Output1', 'Output2', 'Output3', 'Output4', 'var1', 'var2', 'var3', 'var4']

    df = df.iloc[:, :4]

    # Segment and label the data
    labeled_df = segment_and_label_data(df, st_marks, end_marks, labels)
    final_segments = []

    # Apply time slicing to each labeled segment
    for label in labeled_df['label'].unique():
        label_df = labeled_df[labeled_df['label'] == label]
        sliced_segments = time_slice(label_df, window_size, step_size)
        final_segments.extend(sliced_segments)

    # Combine all sliced segments for the current file
    file_segments_df = pd.concat(final_segments, ignore_index=True)
    final_df_list.append(file_segments_df)

final_df = pd.concat(final_df_list, ignore_index=True)


final_df.to_csv('/content/drive/My Drive/eeg data/augmented_data.csv', index=False)

print("Data augmentation complete. Augmented data saved to 'augmented_data.csv'")



Start index: 18831, End index: 20112
Extracted segment between 18831 and 20112 with length 1280
Start index: 16909, End index: 18190
Extracted segment between 16909 and 18190 with length 1280
Start index: 5761, End index: 6914
Extracted segment between 5761 and 6914 with length 1152
Start index: 3455, End index: 4992
Extracted segment between 3455 and 4992 with length 1536
Start index: 9477, End index: 10758
Extracted segment between 9477 and 10758 with length 1280
Start index: 11352, End index: 12552
Extracted segment between 11352 and 12552 with length 1199
Start index: 22666, End index: 23956
Extracted segment between 22666 and 23956 with length 1289
Start index: 7555, End index: 8836
Extracted segment between 7555 and 8836 with length 1280
Start index: 15041, End index: 16268
Extracted segment between 15041 and 16268 with length 1226
Start index: 13193, End index: 14474
Extracted segment between 13193 and 14474 with length 1280
Start index: 20737, End index: 21906
Extracted segment

In [7]:
from scipy.signal import stft,welch
file_path = '/content/drive/My Drive/eeg data/augmented_data.csv'
augmented_data = pd.read_csv(file_path)
fs = 256  # Sampling rate
nperseg = 256
noverlap = 128
from scipy.stats import kurtosis,skew


def preprocess_eeg(eeg_epoch):
    eeg_epoch = eeg_epoch.astype(np.float64)  # Convert to float64
    eeg_epoch = (eeg_epoch - np.mean(eeg_epoch)) / np.std(eeg_epoch)
    return eeg_epoch
def extract_features(eeg_epoch,fs,nperseg,noverlap):
    try:
        f, t, Zxx = stft(eeg_epoch, fs=fs, nperseg=nperseg, noverlap=noverlap)
    except ValueError as e:
        print(f"ValueError: {e}")
        return None

    magnitude = np.abs(Zxx)

    mean_magnitude = np.mean(magnitude, axis=1)
    var_magnitude = np.var(magnitude, axis=1)
    kurtosis_magnitude = kurtosis(magnitude, axis=1)


    features = np.concatenate((mean_magnitude, var_magnitude, kurtosis_magnitude))

    return features





In [9]:
features = []
labels = []
expected_feature_shape = (387,)

for label in augmented_data['label'].unique():
    segments = augmented_data[augmented_data['label'] == label]

    for i, (_, segment) in enumerate(segments.groupby((segments.index // len(df)))):

            eeg_epoch = segment[['Output1', 'Output2', 'Output3', 'Output4']].values.T.flatten()
            eeg_epoch = preprocess_eeg(eeg_epoch)
            feat = extract_features(eeg_epoch,fs,nperseg,noverlap)
            if feat is None:
              continue

            # Check and ensure feat has a consistent shape
            if feat.shape != expected_feature_shape:
              print(f"Skipping segment {i} for label {label}: Inconsistent feature shape ({feat.shape} != {expected_feature_shape})")
              continue
            features.append(feat)
            labels.append(label)



features = np.array(features)
labels = np.array(labels)

print("Features shape:", features.shape)
print("Labels shape:", labels.shape)



Skipping segment 12 for label 0: Inconsistent feature shape ((333,) != (387,))
Features shape: (217, 387)
Labels shape: (217,)


In [10]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

sc= StandardScaler()
features_scaled = sc.fit_transform(features)


n_components = 50
pca = PCA(n_components=n_components)

pca.fit(features_scaled)

features_pca = pca.transform(features_scaled)

In [11]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(features_pca, labels, test_size=0.2, random_state=42)


sc= StandardScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

In [12]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report,accuracy_score



svm = SVC()

# Define the parameter grid to search through
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': ['scale', 'auto', 0.1, 0.01],  # Kernel coefficient
    'kernel': ['linear', 'rbf']  # Kernel type
}

# Perform Grid Search with cross-validation
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(features_pca, labels)

# Print the best parameters and best score
print("Best parameters found:", grid_search.best_params_)
print("Best cross-validation accuracy:", grid_search.best_score_)

# Evaluate the best model on your test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))


Best parameters found: {'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}
Best cross-validation accuracy: 0.11543340380549683
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.50      0.33      0.40         3
           2       0.20      0.50      0.29         2
           3       0.20      0.25      0.22         4
           4       0.29      0.67      0.40         3
           5       0.00      0.00      0.00         5
           6       0.60      0.60      0.60         5
           7       0.43      0.60      0.50         5
           8       0.00      0.00      0.00         3
           9       1.00      0.57      0.73         7
          10       1.00      0.25      0.40         4

    accuracy                           0.36        44
   macro avg       0.38      0.34      0.32        44
weighted avg       0.45      0.36      0.36        44



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()


# Define the parameter grid to search through
param_grid_knn = {
    'n_neighbors': [3, 5, 7, 10],  # Number of neighbors to consider
    'weights': ['uniform', 'distance'],  # Weight function used in prediction
    'metric': ['euclidean', 'manhattan']  # Distance metric
}

# Perform Grid Search with cross-validation
grid_search_knn = GridSearchCV(estimator=knn, param_grid=param_grid_knn, cv=5, scoring='accuracy')
grid_search_knn.fit(features_pca, labels)

# Print the best parameters and best score
print("Best parameters found for KNN:", grid_search_knn.best_params_)
print("Best cross-validation accuracy for KNN:", grid_search_knn.best_score_)

# Evaluate the best KNN model on your test set
best_knn = grid_search_knn.best_estimator_
y_pred_knn = best_knn.predict(X_test)
print("KNN Classification Report:")
print(classification_report(y_test, y_pred_knn))

Best parameters found for KNN: {'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'uniform'}
Best cross-validation accuracy for KNN: 0.11553911205073994
KNN Classification Report:
              precision    recall  f1-score   support

           0       0.17      0.67      0.27         3
           1       0.25      0.67      0.36         3
           2       0.25      1.00      0.40         2
           3       0.33      0.25      0.29         4
           4       0.33      0.33      0.33         3
           5       0.50      0.20      0.29         5
           6       0.50      0.20      0.29         5
           7       1.00      0.20      0.33         5
           8       0.33      0.33      0.33         3
           9       1.00      0.29      0.44         7
          10       0.00      0.00      0.00         4

    accuracy                           0.32        44
   macro avg       0.42      0.38      0.30        44
weighted avg       0.50      0.32      0.31        44



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
