In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt

# Path where the CSV files are located
data_dir = 'C:/Users/ASUS/OneDrive/Desktop/GAMEEMO/Preprocessed EEG Data/.csv format/'

# Read and merge all the CSV files, and calculate the mean for each signal and group
for i in range(1, 28 + 1):  # Loop for signals S01 to S28
    for group_number in range(1, 4 + 1):  # Loop for groups G1 to G4
        signal = f"S{i:02}"
        group = f"G{group_number}"
        file_path = f'final_mean_{data_dir}{signal}.csv'

        # Read the data
        data = pd.read_csv(file_path)

        # Calculate mean values for all columns (excluding the label column, if exists)
        mean_values = data.mean()

        # Save the mean values to a new CSV file
        mean_values.to_csv(f'{data_dir}mean_{signal}{group}.csv', header=['Mean'])
        print(f"Mean values for {signal}{group} have been saved to 'mean_{signal}{group}.csv'")

# Now, let's create a dataset for deep learning with the mean values
# Assuming you've merged or created the dataset with `mean_{signal}{group}.csv` files.
mean_data = []

for i in range(1, 28 + 1):
    for group_number in range(1, 4 + 1):
        signal = f"S{i:02}"
        group = f"G{group_number}"
        file_path = f'{data_dir}mean_{signal}{group}.csv'

        # Read the mean values
        mean_values = pd.read_csv(file_path, index_col=0)

        # Add the mean values and label to the dataset
        mean_data.append(mean_values)

# Convert list to DataFrame
mean_df = pd.concat(mean_data, axis=1).T  # Transpose so signals are rows, features are columns
mean_df = mean_df.reset_index(drop=True)  # Reset index for clean dataframe

# Example: Assuming the last column is the label (mood), and the rest are features
X = mean_df.drop('Mood', axis=1).values  # Features (drop the label column)
y = mean_df['Mood'].values  # Labels (e.g., "happy", "sad", "normal")

# Encode the labels (mood categories) into numeric values
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Reshape the data for LSTM input (3D array: [samples, time_steps, features])
time_steps = 256  # Update this based on your data

# Check if the number of features is divisible by time_steps
if X.shape[1] % time_steps != 0:
    raise ValueError(f"Cannot reshape array with shape {X.shape} into (time_steps, features) because {X.shape[1]} is not divisible by {time_steps}")

features = X.shape[1] // time_steps  # Features per time step

# Reshape X into 3D array (samples, time_steps, features)
X = X.reshape(-1, time_steps, features)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the LSTM model
model = Sequential()

# LSTM Layer
model.add(LSTM(units=128, return_sequences=False, input_shape=(time_steps, features)))

# Dropout to prevent overfitting
model.add(Dropout(0.2))

# Fully connected layer
model.add(Dense(units=64, activation='relu'))

# Output layer with softmax activation for multi-class classification
model.add(Dense(units=3, activation='softmax'))  # 3 classes: happy, sad, normal

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc * 100:.2f}%")

# Plot the training and validation accuracy
plt.plot(history.history['accuracy'], label='train accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

OSError: [Errno 22] Invalid argument: 'final_mean_C:/Users/ASUS/OneDrive/Desktop/GAMEEMO/Preprocessed EEG Data/.csv format/S01.csv'

Finding the mood using the csv

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import os
import glob
import matplotlib.pyplot as plt

# Step 1: Combine All CSV Files for Each Signal and Group
def combine_csv_files(base_path):
    final_data_frames = []
    labels = []  # To store labels for each file (mood categories: Happy, Sad, Normal)
    
    for i in range(1, 28 + 1):
        signal = f"S{i:02}"
        mean_data_frames = []
        
        for group_number in range(1, 4 + 1):  # Iterate over groups G1 to G4
            group = f"G{group_number}"
            mean_file_path = f'{base_path}({signal})/Preprocessed EEG Data/.csv format/mean_{signal}{group}.csv'
            
            try:
                # Read the mean CSV file
                mean_data = pd.read_csv(mean_file_path, index_col=0)
                mean_data_frames.append(mean_data)
            except FileNotFoundError:
                print(f"File not found: {mean_file_path}")
                continue

        if mean_data_frames:
            # Combine group means and calculate final mean for the signal
            combined_means = pd.concat(mean_data_frames, axis=1)  # Combine group means horizontally
            final_mean = combined_means.mean(axis=1)  # Calculate mean across groups
            
            # Add signal's final mean to the dataset
            final_data_frames.append(final_mean)

            # Manually assign mood labels for now (replace with actual mood mapping logic)
            # Example: 0 = Happy, 1 = Sad, 2 = Normal
            labels.append(np.random.randint(0, 3))  # Replace with your mood labels
    
    # Combine all signals' final means into a single DataFrame
    if final_data_frames:
        final_dataset = pd.concat(final_data_frames, axis=1).T.reset_index(drop=True)
        final_dataset['Mood'] = labels  # Add the mood labels as the target column
        return final_dataset
    else:
        raise Exception("No valid mean data found to process.")

# Step 2: Load and Prepare the Data
base_path = 'C:/Users/ASUS/OneDrive/Desktop/GAMEEMO/'  # Update to your base path
data = combine_csv_files(base_path)

# Split into features and labels
X = data.iloc[:, :-1].values  # Features (EEG mean values)
y = data.iloc[:, -1].values   # Labels (Mood categories)

# Encode labels (0 = Happy, 1 = Sad, 2 = Normal)
y = to_categorical(y)

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Reshape features for LSTM input (samples, timesteps, features)
time_steps = 256  # Update this based on your data
features = X.shape[1] // time_steps
X = X.reshape(-1, time_steps, features)

# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Build the LSTM Model
model = Sequential([
    LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.3),
    LSTM(64),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(y.shape[1], activation='softmax')  # Output layer for 3 classes
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32)

# Step 4: Evaluate the Model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Save the model
model.save("eeg_mood_lstm_model.h5")

# Step 5: Visualize Training Performance
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

mam ka code 

In [None]:
# Step 1: Load and Preprocess EEG Data
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Load EEG dataset
try:
    for i in range(1, 28 + 1):
        for group_number in range(1, 4 + 1):
            signal = f"S{i:02}"
            group = f"G{group_number}"
            file_path = f'C:/Users/ASUS/OneDrive/Desktop/GAMEEMO/({signal})/Preprocessed EEG Data/.csv format/final_mean_{signal}.csv'
    data = pd.read_csv(file_path)  # Replace with your dataset path
except FileNotFoundError:
    print(f"Error: File {file_path} not found. Please provide the correct path.")
    exit()

# Ensure dataset has no missing values
if data.isnull().sum().sum() > 0:
    print("Error: Dataset contains missing values. Please handle them before proceeding.")
    exit()

# Ensure dataset is numeric
if not np.issubdtype(data.dtypes.values[0], np.number):
    print("Error: Non-numeric data detected. Ensure all features are numeric.")
    exit()

# Scale features
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# Optional: Reduce dimensions for visualization
pca = PCA(n_components=2)  # For 2D visualization
data_pca = pca.fit_transform(data_scaled)

# Visualize data distribution
plt.figure(figsize=(8, 6))
plt.scatter(data_pca[:, 0], data_pca[:, 1], alpha=0.7)
plt.title('EEG Data Distribution (PCA)')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()

# Step 2: Cluster Data into 3 Groups using K-Means Clustering

# Cluster into 3 groups
kmeans = KMeans(n_clusters=3, random_state=42)
labels = kmeans.fit_predict(data_scaled)

# Add cluster labels to the dataset
data['cluster_labels'] = labels

# Visualize clustering
plt.figure(figsize=(8, 6))
plt.scatter(data_pca[:, 0], data_pca[:, 1], c=labels, cmap='viridis', alpha=0.7)
plt.title('Clustering Results')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar(label='Cluster')
plt.show()

# Step 3: Map Clusters to Desired Labels
# Analyze the clusters and assign labels based on heuristics or domain knowledge.

# Example heuristic for mapping
# Assuming mean values of features determine emotional state:
# Cluster with the lowest mean -> -1 (Sad)
# Cluster with middle mean ->  0 (Neutral)
# Cluster with highest mean ->  1 (Happy)
cluster_means = data.drop(['cluster_labels'], axis=1).groupby(data['cluster_labels']).mean()
cluster_map = cluster_means.mean(axis=1).sort_values().index

# Map clusters to -1, 0, 1
cluster_to_emotion = {cluster_map[0]: -1, cluster_map[1]: 0, cluster_map[2]: 1}
data['emotion_labels'] = data['cluster_labels'].map(cluster_to_emotion)

print("Cluster to Emotion Mapping:", cluster_to_emotion)
print(data[['cluster_labels', 'emotion_labels']].head())

# Step 4: Train a Model Using New Labels
# Separate features and labels
X = data.drop(['cluster_labels', 'emotion_labels'], axis=1)
y = data['emotion_labels']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))


Error: Dataset contains missing values. Please handle them before proceeding.
Error: Non-numeric data detected. Ensure all features are numeric.


ValueError: could not convert string to float: 'AF3'

: 

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Step 1: Load and preprocess EEG data
# Load the dataset
for i in range(1, 28 + 1):
        for group_number in range(1, 4 + 1):
            signal = f"S{i:02}"
            group = f"G{group_number}"
            file_path = f'C:/Users/ASUS/OneDrive/Desktop/GAMEEMO/({signal})/Preprocessed EEG Data/.csv format/final_mean_{signal}.csv'
data = pd.read_csv(file_path)

# Inspect the dataset
print("Initial Dataset:")
print(data.head())

# Handle unnamed or empty columns
data = data.dropna(axis=1, how='all')  # Drop completely empty columns

# Select numeric column
if 'Final Mean' in data.columns:
    numeric_data = data['Final Mean']
else:
    raise ValueError("Expected column 'Final Mean' not found in the dataset.")

# Convert to 2D array for StandardScaler
numeric_data = numeric_data.values.reshape(-1, 1)

# Scale the numeric data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(numeric_data)

# Optional: Reduce dimensions for visualization using PCA
pca = PCA(n_components=2)
data_pca = pca.fit_transform(data_scaled)

# Visualize data distribution
plt.figure(figsize=(8, 6))
plt.scatter(data_pca[:, 0], data_pca[:, 1], alpha=0.7)
plt.title('EEG Data Distribution (PCA)')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()

# Step 2: Cluster data into 3 groups using K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
labels = kmeans.fit_predict(data_scaled)

# Add cluster labels to a DataFrame for further processing
clustered_data = pd.DataFrame(data_scaled, columns=['Scaled Final Mean'])
clustered_data['cluster_labels'] = labels

# Visualize clustering
plt.figure(figsize=(8, 6))
plt.scatter(data_pca[:, 0], data_pca[:, 1], c=labels, cmap='viridis', alpha=0.7)
plt.title('Clustering Results')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar(label='Cluster')
plt.show()

# Step 3: Map clusters to desired labels
# Analyze the clusters and assign labels based on their means
cluster_means = clustered_data.groupby('cluster_labels')['Scaled Final Mean'].mean()
cluster_map = cluster_means.sort_values().index

# Map clusters to emotions (-1: Sad, 0: Neutral, 1: Happy)
cluster_to_emotion = {cluster_map[0]: -1, cluster_map[1]: 0, cluster_map[2]: 1}
clustered_data['emotion_labels'] = clustered_data['cluster_labels'].map(cluster_to_emotion)

print("Cluster to Emotion Mapping:", cluster_to_emotion)
print(clustered_data[['cluster_labels', 'emotion_labels']].head())

# Step 4: Train a model using the new labels
# Prepare features and labels
X = clustered_data[['Scaled Final Mean']]
y = clustered_data['emotion_labels']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict and evaluate
y_pred = clf.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))


Initial Dataset:
  Unnamed: 0  Final Mean
0        AF3    0.005432
1        AF4    0.014438
2         F3   -0.002649
3         F4   -0.016780
4         F7    0.098770


ValueError: Input X contains NaN.
PCA does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values