In [None]:
import pandas as pd
df = pd.read_csv("ArmMovementDetection_Dataset.csv")
print(df.head(20))

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv("ArmMovementDetection_Dataset.csv")

# Split features and labels
X = data.drop(columns=['label'])
y = data['label']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize SVM classifier
clf = SVC(kernel='rbf', random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Predict labels for the test set
y_pred = clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv("ArmMovementDetection_Dataset.csv")

# Split features and labels
X = data.drop(columns=['label'])
y = data['label']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model Selection and Parameter Tuning
models = [
    {
        'name': 'SVM',
        'estimator': SVC(),
        'params': {
            'kernel': ['linear', 'rbf', 'poly'],
            'C': [0.1, 1, 10]
        }
    },
    {
        'name': 'Random Forest',
        'estimator': RandomForestClassifier(),
        'params': {
            'n_estimators': [50, 100, 200],
            'max_depth': [None, 5, 10]
        }
    }
]

for model in models:
    grid_search = GridSearchCV(model['estimator'], model['params'], cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_estimator = grid_search.best_estimator_
    y_pred = best_estimator.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model['name']} Accuracy: {accuracy}")
    print(f"Best Parameters: {grid_search.best_params_}\n")


In [None]:
import matplotlib.pyplot as plt

# Results
models_results = {
    'SVM': {'accuracy': 0.508, 'best_params': {'C': 10, 'kernel': 'poly'}},
    'Random Forest': {'accuracy': 0.590, 'best_params': {'max_depth': 5, 'n_estimators': 50}}
}

# Plotting
fig, ax = plt.subplots(figsize=(10, 6))

# Accuracy bar plot
ax.bar(models_results.keys(), [result['accuracy'] for result in models_results.values()], color=['blue', 'green'])

# Adding accuracy values on top of bars
for i, (model, result) in enumerate(models_results.items()):
    ax.text(i, result['accuracy'] + 0.01, f"{result['accuracy']:.3f}", ha='center', fontsize=12)

# Adding best parameters as annotations
for i, (model, result) in enumerate(models_results.items()):
    best_params = ", ".join([f"{k}: {v}" for k, v in result['best_params'].items()])
    ax.text(i, 0.5, f"Best Params:\n{best_params}", ha='center', va='center', fontsize=10, bbox=dict(facecolor='white', alpha=0.5))

# Title and labels
ax.set_title('Comparison of SVM and Random Forest Classifiers', fontsize=16)
ax.set_ylabel('Accuracy', fontsize=14)
ax.set_ylim(0, 1)

# Show plot
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import numpy as np  # Add this import statement
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("ArmMovementDetection_Dataset.csv")

# Data distribution
plt.figure(figsize=(10, 6))
sns.histplot(data=data, x='TP9', hue='label', kde=True, bins=20)
plt.title('Distribution of TP9 feature by Label')
plt.xlabel('TP9')
plt.ylabel('Frequency')
plt.legend(title='Label')
plt.show()

# Correlation matrix
plt.figure(figsize=(10, 6))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')
plt.show()

# Feature importance with Random Forest
X = data.drop(columns=['label'])
y = data['label']
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X, y)

plt.figure(figsize=(10, 6))
importances = rf.feature_importances_
indices = np.argsort(importances)[::-1]
sns.barplot(x=importances[indices], y=X.columns[indices])
plt.title("Feature Importance (Random Forest)")
plt.xlabel('Relative Importance')
plt.ylabel('Features')
plt.show()

# Decision boundaries using PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=y, palette='viridis')
plt.title('Decision Boundaries (PCA)')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("ArmMovementDetection_Dataset.csv")

# Data distribution for all electrodes
plt.figure(figsize=(12, 8))
for i, electrode in enumerate(['TP9', 'AF7', 'AF8', 'TP10'], start=1):
    plt.subplot(2, 2, i)
    sns.histplot(data=data, x=electrode, hue='label', kde=True, bins=20)
    plt.title(f'Distribution of {electrode} feature by Label')
    plt.xlabel(electrode)
    plt.ylabel('Frequency')
    plt.legend(title='Label')
plt.tight_layout()
plt.show()

# Correlation matrix
plt.figure(figsize=(10, 6))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')
plt.show()

# Feature importance with Random Forest
X = data.drop(columns=['label'])
y = data['label']
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X, y)

plt.figure(figsize=(10, 6))
importances = rf.feature_importances_
indices = np.argsort(importances)[::-1]
sns.barplot(x=importances[indices], y=X.columns[indices])
plt.title("Feature Importance (Random Forest)")
plt.xlabel('Relative Importance')
plt.ylabel('Features')
plt.show()

# Decision boundaries using PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=y, palette='viridis')
plt.title('Decision Boundaries (PCA)')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("ArmMovementDetection_Dataset.csv")

# Split features and labels
X = data.drop(columns=['label'])
y = data['label']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train Gradient Boosting Classifier
gbc = GradientBoostingClassifier(random_state=42)
gbc.fit(X_train, y_train)

# Predict labels for test set
y_pred_gbc = gbc.predict(X_test)

# Evaluate accuracy
accuracy_gbc = accuracy_score(y_test, y_pred_gbc)
print("Gradient Boosting Classifier Accuracy:", accuracy_gbc)

# Plot decision boundaries using PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=y, palette='viridis')
plt.title('Decision Boundaries (PCA) - Gradient Boosting Classifier')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()


In [None]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have the dataset loaded into X and y
# For example:
# X = your feature data
# y = your labels (0 for no movement, 1 for movement)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Selection and Parameter Tuning
models = [
    {
        'name': 'SVM',
        'estimator': SVC(),
        'params': {
            'kernel': ['linear', 'rbf', 'poly'],
            'C': [0.1, 1, 10]
        }
    },
    {
        'name': 'Random Forest',
        'estimator': RandomForestClassifier(),
        'params': {
            'n_estimators': [50, 100, 200],
            'max_depth': [None, 5, 10]
        }
    },
    {
        'name': 'Gradient Boosting',
        'estimator': GradientBoostingClassifier(),
        'params': {
            'n_estimators': [50, 100, 200],
            'learning_rate': [0.01, 0.1, 0.5]
        }
    },
    {
        'name': 'Logistic Regression',
        'estimator': LogisticRegression(),
        'params': {
            'C': [0.1, 1, 10],
            'solver': ['liblinear', 'lbfgs']
        }
    },
    {
        'name': 'K-Nearest Neighbors',
        'estimator': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3, 5, 7],
            'weights': ['uniform', 'distance']
        }
    }
]

for model in models:
    grid_search = GridSearchCV(model['estimator'], model['params'], cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_estimator = grid_search.best_estimator_
    y_pred = best_estimator.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f"{model['name']} Performance:")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print(f"Best Parameters: {grid_search.best_params_}\n")


In [None]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have the dataset loaded into X and y
# For example:
# X = your feature data
# y = your labels (0 for no movement, 1 for movement)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Selection and Parameter Tuning
models = [
    {
        'name': 'SVM',
        'estimator': SVC(),
        'params': {
            'kernel': ['linear', 'rbf', 'poly'],
            'C': [0.01, 0.1, 1, 10, 100]  # Adjusting C for SVM regularization
        }
    },
    {
        'name': 'Random Forest',
        'estimator': RandomForestClassifier(),
        'params': {
            'n_estimators': [50, 100, 200, 300],  # Trying more estimators
            'max_depth': [None, 5, 10, 20]  # Adjusting max depth
        }
    },
    {
        'name': 'Gradient Boosting',
        'estimator': GradientBoostingClassifier(),
        'params': {
            'n_estimators': [50, 100, 200, 300],  # Trying more estimators
            'learning_rate': [0.01, 0.1, 0.5],  # Adjusting learning rate
            'max_depth': [3, 5, 10]  # Adjusting max depth
        }
    },
    {
        'name': 'Logistic Regression',
        'estimator': LogisticRegression(),
        'params': {
            'C': [0.01, 0.1, 1, 10, 100],  # Adjusting regularization strength
            'solver': ['liblinear', 'lbfgs']
        }
    },
    {
        'name': 'K-Nearest Neighbors',
        'estimator': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3, 5, 7, 9],  # Trying more neighbors
            'weights': ['uniform', 'distance']
        }
    }
]

for model in models:
    grid_search = GridSearchCV(model['estimator'], model['params'], cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_estimator = grid_search.best_estimator_
    y_pred = best_estimator.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f"{model['name']} Performance:")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print(f"Best Parameters: {grid_search.best_params_}\n")


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import matplotlib.pyplot as plt

# Load your dataset
df = pd.read_csv('ArmMovementDetection_Dataset.csv')

# Assuming your dataset has a 'label' column and feature columns start from column 1 to end
# Modify if your dataset structure is different

# Check the number of samples in each class before balancing
print("Number of samples in each class before balancing:")
print(df['label'].value_counts())

# Balance the dataset by downsampling the majority class
min_class = df['label'].value_counts().idxmin()
df_balanced = df.groupby('label').apply(lambda x: x.sample(n=df['label'].value_counts()[min_class])).reset_index(drop=True)

# Check the number of samples in each class after balancing
print("Number of samples in each class after balancing:")
print(df_balanced['label'].value_counts())

X_balanced = df_balanced.iloc[:, 1:].values
y_balanced = df_balanced.iloc[:, 0].values

# Split the data into training and validation sets (70% training, 30% validation)
X_train, X_val, y_train, y_val = train_test_split(X_balanced, y_balanced, test_size=0.3, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Combine the training and validation labels
y_combined = np.concatenate((y_train, y_val), axis=0)

# Fit the LabelEncoder on the combined labels
le = LabelEncoder()
y_combined_encoded = le.fit_transform(y_combined)

# Transform the training and validation labels
y_train_encoded = y_combined_encoded[:len(y_train)]
y_val_encoded = y_combined_encoded[len(y_train):]

# Count the number of unique classes in the combined labels
num_classes = len(np.unique(y_combined_encoded))

# Build the neural network model with the correct number of output units
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')  # Adjust according to the number of unique classes
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train_encoded, epochs=50, validation_data=(X_val, y_val_encoded), batch_size=32)

# Evaluate the model
y_pred = np.argmax(model.predict(X_val), axis=1)

# Calculate metrics
accuracy = accuracy_score(y_val_encoded, y_pred)
precision = precision_score(y_val_encoded, y_pred, average='weighted')
recall = recall_score(y_val_encoded, y_pred, average='weighted')
f1 = f1_score(y_val_encoded, y_pred, average='weighted')

# Confusion matrix
cm = confusion_matrix(y_val_encoded, y_pred)

# True Positives, True Negatives, False Positives, False Negatives
tp = cm[1, 1]
tn = cm[0, 0]
fp = cm[0, 1]
fn = cm[1, 0]

# Print the results
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Confusion Matrix:\n{cm}")
print(f"True Positives: {tp}")
print(f"True Negatives: {tn}")
print(f"False Positives: {fp}")
print(f"False Negatives: {fn}")

# Plotting training and validation accuracy over epochs
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.show()

# Save the model if needed
model.save('eeg_movement_detection_model.h5')


In [None]:
y_original = data['label']
class_counts_before_balancing = y_original.value_counts()

print("Number of samples in each class before balancing:")
print(class_counts_before_balancing)

In [None]:
from imblearn.over_sampling import RandomOverSampler

X = data.drop(columns=['label'])
y = data['label']

oversampler = RandomOverSampler(random_state=42)
X_balanced, y_balanced = oversampler.fit_resample(X, y)

print("Number of samples in each class after balancing:")
print(y_balanced.value_counts())

In [None]:
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv("ArmMovementDetection_Dataset.csv")

# Display the head of the dataset
print("Head of the dataset:")
print(data.head())

# Separate features (X) and target variable (y)
X = data.drop(columns=['label'])
y = data['label']

# Oversampling to balance the classes
oversampler = RandomOverSampler(random_state=42)
X_balanced, y_balanced = oversampler.fit_resample(X, y)

# Check the number of samples in each class after balancing
print("Number of samples in each class after balancing:")
print(y_balanced.value_counts())

# Split the data into training and validation sets (70% training, 30% validation)
X_train, X_val, y_train, y_val = train_test_split(X_balanced, y_balanced, test_size=0.3, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Train a Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train_scaled, y_train)

# Predictions on the validation set
y_pred = rf_classifier.predict(X_val_scaled)

# Evaluate the model
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Classification report
print("Classification Report:")
print(classification_report(y_val, y_pred))


In [None]:
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, recall_score, f1_score

# Load the dataset
data = pd.read_csv("ArmMovementDetection_Dataset.csv")

# Display the head of the dataset
print("Head of the dataset:")
print(data.head())

# Separate features (X) and target variable (y)
X = data.drop(columns=['label'])
y = data['label']

# Oversampling to balance the classes
oversampler = RandomOverSampler(random_state=42)
X_balanced, y_balanced = oversampler.fit_resample(X, y)

# Check the number of samples in each class after balancing
print("Number of samples in each class after balancing:")
print(y_balanced.value_counts())

# Split the data into training and validation sets (70% training, 30% validation)
X_train, X_val, y_train, y_val = train_test_split(X_balanced, y_balanced, test_size=0.3, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Train a Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train_scaled, y_train)

# Predictions on the validation set
y_pred = rf_classifier.predict(X_val_scaled)

# Evaluate the model
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Classification report
print("Classification Report:")
print(classification_report(y_val, y_pred))

# Model Selection and Parameter Tuning
models = [
    {
        'name': 'SVM',
        'estimator': SVC(),
        'params': {
            'kernel': ['linear', 'rbf', 'poly'],
            'C': [0.01, 0.1, 1, 10, 100]  # Adjusting C for SVM regularization
        }
    },
    {
        'name': 'Random Forest',
        'estimator': RandomForestClassifier(),
        'params': {
            'n_estimators': [50, 100, 200, 300],  # Trying more estimators
            'max_depth': [None, 5, 10, 20]  # Adjusting max depth
        }
    },
    {
        'name': 'Gradient Boosting',
        'estimator': GradientBoostingClassifier(),
        'params': {
            'n_estimators': [50, 100, 200, 300],  # Trying more estimators
            'learning_rate': [0.01, 0.1, 0.5],  # Adjusting learning rate
            'max_depth': [3, 5, 10]  # Adjusting max depth
        }
    },
    {
        'name': 'Logistic Regression',
        'estimator': LogisticRegression(),
        'params': {
            'C': [0.01, 0.1, 1, 10, 100],  # Adjusting regularization strength
            'solver': ['liblinear', 'lbfgs']
        }
    },
    {
        'name': 'K-Nearest Neighbors',
        'estimator': KNeighborsClassifier(),
        'params': {
            'n_neighbors': [3, 5, 7, 9],  # Trying more neighbors
            'weights': ['uniform', 'distance']
        }
    }
]

for model in models:
    grid_search = GridSearchCV(model['estimator'], model['params'], cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    best_estimator = grid_search.best_estimator_
    y_pred = best_estimator.predict(X_val)

    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)

    print(f"{model['name']} Performance:")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print(f"Best Parameters: {grid_search.best_params_}\n")
