# Frequency Domain Feature extraction 

In [None]:
# make feature extracted dataset of a particular emotion


import scipy.io
import os
import numpy as np
from vmdpy import VMD
import matplotlib.pyplot as plt 
import pandas as pd

# Directory containing the EEG data files (path to the folder containing eeg data of particular emotion)
directory_path = 'E:/BCI7thsem/emotionclips/filtered_data/filtered_data_mat/filter_wd_sgf_fir/SadColl'


# Get a list of all files in the directory
file_list = [f for f in os.listdir(directory_path) if f.endswith('-2.mat')]

# Initialize final_data to None
final_data = np.zeros((1,25))


# Loop through each file in the directory
for file_name in file_list:
    file_path = os.path.join(directory_path, file_name)
    
    

    # Load EEG data from the current file
    mat = scipy.io.loadmat(file_path)
    eeg_data = mat['artifact_free_data']
    
    # Assuming you have loaded your EEG data into a variable named 'eeg_data'
    # 'eeg_data' should be a 2D array with dimensions (time_points, channels)

    # Number of channels and time points
    num_time_points, num_channels = eeg_data.shape

    # Parameters for VMD
    num_modes = 5  # Number of modes/components
    alph = 500    # Secondary penalty factor for VMD
    tau=0
    DC=0
    init=1
    tol=1e-7
    
    
    # Initialize feature matrices
#     combined_features = np.zeros((num_windows, num_channels, num_modes * 2))  # DE and STE for each mode
    alpha_n = np.zeros((1, num_modes))
    beta_n = np.zeros((1, num_modes))
    gamma_n=np.zeros((1, num_modes))
    delta_n=np.zeros((1, num_modes))
    theta_n=np.zeros((1, num_modes))

    # Parameters for feature extraction
    window_size = int(3 * 128)  # 5-second window size (assuming 128Hz sampling rate)
#     overlap = int(window_size / 2)  # 50% overlap
    num_windows = int((num_time_points) / (window_size))

        
    # Initialize an explicit window counter
    window_counter = 0

    # VMD decomposition and feature extraction
    for window_index in range(num_windows):
        window_start = window_index * (window_size)
        window_end = window_start + window_size
        window_end = min(window_end, num_time_points)
        print(f"Window start: {window_start/128}, Window end: {window_end/128}")

        # Extract EEG segment within the window
        eeg_window = eeg_data[window_start:window_end, :]
        # Initialize feature matrices for DE and STE
        alpha = np.zeros((num_channels, num_modes))
        beta = np.zeros((num_channels, num_modes))
        gamma = np.zeros((num_channels, num_modes))
        delta = np.zeros((num_channels, num_modes))
        theta = np.zeros((num_channels, num_modes))

        # Feature extraction for each channel
        for channel in range(0,32):
            # Apply VMD decomposition to each channel
            u, u_hat, omega = VMD(eeg_window[:, channel], alph,tau, num_modes,DC, init, tol)
            modes=u
#             print(modes.shape)


            # DE calculation for each mode
            for mode in range(0,5):
                signal=modes[mode, :]
                # Compute the FFT
                fft_result = np.fft.fft(signal)
                fft_freq = np.fft.fftfreq(len(signal), d=1/128)
                
                # Calculate power spectral density (PSD)
                psd = np.abs(fft_result)**2
                
                # Initialize variables to store power in each band
                delta_power = 0
                theta_power = 0
                alpha_power = 0
                beta_power = 0
                gamma_power = 0
                
                for i in range(len(fft_freq)):
                    freq = fft_freq[i]
                    freq_power = psd[i]

                    # Check if frequency falls within each band's range
                    if 0.5 <= freq <= 4:
                        delta_power += freq_power
                    elif 4 <= freq <= 8:
                        theta_power += freq_power
                    elif 8 <= freq <= 12:
                        alpha_power += freq_power
                    elif 12 <= freq <= 30:
                        beta_power += freq_power
                    elif 30 <= freq <= 100:
                        gamma_power += freq_power

#                 # Calculate Spectral Centroid
#                 spectral_centroid = np.sum(fft_freq * psd) / np.sum(psd)
#                 # Calculate Standard Deviation of Power
#                 power_std = np.std(psd)
#                 # Calculate Log Energy Entropy
#                 log_energy_entropy = -np.sum(psd * np.log(psd + 1e-10)) / np.log(len(psd))



                alpha[channel, mode] = alpha_power
                beta[channel, mode] = beta_power
                gamma[channel, mode]=gamma_power
                delta[channel, mode]=delta_power
                theta[channel, mode]=theta_power
            
           


        alpha_n=np.concatenate([alpha_n, alpha], axis=0) 
        beta_n=np.concatenate([beta_n, beta], axis=0)
        gamma_n=np.concatenate([gamma_n, gamma], axis=0)
        delta_n=np.concatenate([delta_n, delta], axis=0)
        theta_n=np.concatenate([theta_n, theta], axis=0)
        
        # Combine DE and STE features for each mode and channel
#         if window_start < num_windows:
#             combined_features[window_start, :, :] = np.concatenate((de_features, ste_features), axis=1)


        # Increment the window counter
        window_counter += 1   
    # Reshape the combined features array for classification
#     reshaped_features = combined_features.reshape(-1, combined_features.shape[2])
#     print(de_features_n.shape)
#     print(ste_features_n.shape)
    feature_map=np.concatenate([alpha_n,beta_n,gamma_n,delta_n,theta_n], axis=1)
    

#     print(feature_map.shape)
    final_data = np.concatenate((final_data, feature_map),axis=0)
    print("iteration done")
    print(final_data.shape)
    # Now 'reshaped_features' contains the extracted features for each window and channel
    # You can proceed with further steps such as feature selection, classification, etc.
dfF = pd.DataFrame(final_data)
dfF.head()
dfF.to_csv('E:/BCI7thsem/emotionclips/filtered_data/filtered_data_mat/filter_wd_sgf_fir/featureextractALL-EXP/Sad_freq_3s.csv', index=False)






# Preprocessing and final dataset making

In [None]:
# combine all emotions' extracted datasets 
# labes : happy 0, sad 1 and fear 2


from scipy import stats
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

df1=pd.read_csv('E:/BCI7thsem/emotionclips/filtered_data/filtered_data_mat/filter_wd_sgf_fir/featureextractALL-EXP/Happy_freq_15s.csv')
df2=pd.read_csv('E:/BCI7thsem/emotionclips/filtered_data/filtered_data_mat/filter_wd_sgf_fir/featureextractALL-EXP/Sad_freq_15s.csv')
df3=pd.read_csv('E:/BCI7thsem/emotionclips/filtered_data/filtered_data_mat/filter_wd_sgf_fir/featureextractALL-EXP/Fear_freq_15s.csv')



df1_z = stats.zscore(df1)
df2_z = stats.zscore(df2)
df3_z = stats.zscore(df3)

df1 = pd.DataFrame(scaler.fit_transform(df1_z), columns=df1.columns)
df2 = pd.DataFrame(scaler.fit_transform(df2_z), columns=df2.columns)
df3 = pd.DataFrame(scaler.fit_transform(df3_z), columns=df3.columns)


df1 = df1.loc[(df1 != 0).any(axis=1)]
df2 = df2.loc[(df2 != 0).any(axis=1)]
df3 = df3.loc[(df3 != 0).any(axis=1)]


df1['label'] = 0
df2['label'] = 1
df3['label'] = 2



df_fmHJ = pd.concat([df1, df2, df3], ignore_index=True)

df_fmHJ.head()
df_fmHJ.to_csv('E:/BCI7thsem/emotionclips/filtered_data/filtered_data_mat/filter_wd_sgf_fir/featureextractALL-EXP/Frequency_features_15s.csv', index=False)


# ML

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv("E:/BCI7thsem/emotionclips/filtered_data/filtered_data_mat/filter_wd_sgf_fir/featureextractALL-EXP/Freq_Features_1sec.csv")

# Drop rows with NaN values
df.dropna(inplace=True)

# Split features and labels
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# import xgboost as xgb
from sklearn.svm import SVC
# from sklearn.naive_bayes import GaussianNB

# Decision Tree Classifier
# classifier = DecisionTreeClassifier()

# svm classifier
classifier = SVC()

# XGBoost Classifier
# classifier = xgb.XGBClassifier()

# Gaussian Naive Bayes Classifier
# classifier = GaussianNB()



# K-fold Cross Validation
k_fold = KFold(n_splits=6, shuffle=True, random_state=42)

# Compute validation accuracy
validation_accuracy = np.mean(cross_val_score(classifier, X_train, y_train, cv=k_fold, scoring='accuracy'))

# Train the model
classifier.fit(X_train, y_train)

# Predictions on the test set
y_pred_test = classifier.predict(X_test)

# Compute metrics
test_accuracy = accuracy_score(y_test, y_pred_test)
precision = precision_score(y_test, y_pred_test, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred_test, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred_test, average='weighted', zero_division=0)

# Confusion Matrix on training set
y_pred_train = classifier.predict(X_train)
conf_matrix_train = confusion_matrix(y_train, y_pred_train)

# Confusion Matrix on testing set
conf_matrix_test = confusion_matrix(y_test, y_pred_test)

# Plot Confusion Matrix
def plot_confusion_matrix(conf_matrix, title):
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
    plt.title(title)
    plt.xlabel('Predicted labels')
    plt.ylabel('True labels')
    plt.show()

# # Plot Confusion Matrix for training set
# plot_confusion_matrix(conf_matrix_train, title="Confusion Matrix - Training Set")

# # Plot Confusion Matrix for testing set
# plot_confusion_matrix(conf_matrix_test, title="Confusion Matrix - Testing Set")

# Print metrics
print("Validation Accuracy:", validation_accuracy)
print("Test Accuracy:", test_accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


# For each feature

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import accuracy_score
import numpy as np

# Initialize lists to store validation and test accuracies
validation_accuracies = []
test_accuracies = []

# K-fold Cross Validation
k_fold = KFold(n_splits=6, shuffle=True, random_state=42)

# Iterate over each feature
for i in range(X_train.shape[1]):
    # Select the feature
    X_train_feature = X_train[:, i].reshape(-1, 1)
    X_test_feature = X_test[:, i].reshape(-1, 1)

    # Random Forest Classifier
#     classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    classifier = SVC()
    # classifier=KNeighborsClassifier(n_neighbors=9)
    # classifier = DecisionTreeClassifier()


    # Compute validation accuracy
    validation_accuracy = np.mean(cross_val_score(classifier, X_train_feature, y_train, cv=k_fold, scoring='accuracy'))
    validation_accuracies.append(validation_accuracy)
    print(validation_accuracy)

    # Train the model
    classifier.fit(X_train_feature, y_train)

    # Predictions on the test set
    y_pred_test = classifier.predict(X_test_feature)

    # Compute test accuracy
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_accuracies.append(test_accuracy)

# Print validation and test accuracies for each feature
for i in range(len(validation_accuracies)):
    print(f"Feature {i+1}: Validation Accuracy: {validation_accuracies[i]}, Test Accuracy: {test_accuracies[i]}")


# For each power band

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import accuracy_score
import numpy as np

# Initialize lists to store validation and test accuracies
validation_accuracies = []
test_accuracies = []

# K-fold Cross Validation
k_fold = KFold(n_splits=6, shuffle=True, random_state=42)

# Iterate over feature groups
for i in range(0, X_train.shape[1], 5):
    # Select the features for the current group
    X_train_features = X_train[:, i:i+5]
    X_test_features = X_test[:, i:i+5]

    # Random Forest Classifier
#     classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    classifier = SVC()
    # classifier = DecisionTreeClassifier()
    # classifier=KNeighborsClassifier(n_neighbors=9)


    # Compute validation accuracy
    validation_accuracy = np.mean(cross_val_score(classifier, X_train_features, y_train, cv=k_fold, scoring='accuracy'))
    validation_accuracies.append(validation_accuracy)
    print(validation_accuracy)

    # Train the model
    classifier.fit(X_train_features, y_train)

    # Predictions on the test set
    y_pred_test = classifier.predict(X_test_features)

    # Compute test accuracy
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_accuracies.append(test_accuracy)

# Print validation and test accuracies for each feature group
for i in range(len(validation_accuracies)):
    print(f"Feature Group {i+1}: Validation Accuracy: {validation_accuracies[i]}, Test Accuracy: {test_accuracies[i]}")


# For each IMF

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import accuracy_score
import numpy as np

# Initialize lists to store validation and test accuracies
validation_accuracies = []
test_accuracies = []

# K-fold Cross Validation
k_fold = KFold(n_splits=6, shuffle=True, random_state=42)

# Define feature indices for each set
feature_sets = [[0, 5, 10, 15, 20], [1, 6, 11, 16, 21], [2, 7, 12, 17, 22], [3, 8, 13, 18, 23], [4, 9, 14, 19, 24]]

# Iterate over feature sets
for feature_indices in feature_sets:
    # Select the features for the current set
    X_train_features = X_train[:, feature_indices]
    X_test_features = X_test[:, feature_indices]

    # Random Forest Classifier
#     classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    classifier = SVC()
    # classifier = DecisionTreeClassifier()
    # classifier=KNeighborsClassifier(n_neighbors=9)

    # Compute validation accuracy
    validation_accuracy = np.mean(cross_val_score(classifier, X_train_features, y_train, cv=k_fold, scoring='accuracy'))
    validation_accuracies.append(validation_accuracy)
    print(validation_accuracy)

    # Train the model
    classifier.fit(X_train_features, y_train)

    # Predictions on the test set
    y_pred_test = classifier.predict(X_test_features)

    # Compute test accuracy
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_accuracies.append(test_accuracy)

# Print validation and test accuracies for each feature set
for i in range(len(validation_accuracies)):
    print(f"Feature Set {i+1}: Validation Accuracy: {validation_accuracies[i]}, Test Accuracy: {test_accuracies[i]}")


In [3]:
import os
os.getcwd()

'C:\\Users\\saman'