In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold

data = pd.read_csv('/eeg data.csv')

X = data.iloc[:, 1:-1]
y = data.iloc[:, -1]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
classifier = RandomForestClassifier(random_state=42)

print("\n--- Univariate Feature Selection (UFS) ---")
selector_ufs = SelectKBest(f_classif, k=5)
X_ufs = selector_ufs.fit_transform(X_scaled, y)

selected_indices_ufs = selector_ufs.get_support(indices=True)
selected_features_ufs = X.columns[selected_indices_ufs].tolist()
print(f"Top 5 features selected by UFS: {selected_features_ufs}")

cv_scores_ufs = cross_val_score(classifier, X_ufs, y, cv=cv)
print(f"Mean accuracy with UFS: {cv_scores_ufs.mean():.4f}")

print("\n--- Recursive Feature Elimination (RFE) ---")
selector_rfe = RFE(estimator=RandomForestClassifier(random_state=42),
                  n_features_to_select=5)
X_rfe = selector_rfe.fit_transform(X_scaled, y)

selected_indices_rfe = np.where(selector_rfe.support_)[0]
selected_features_rfe = X.columns[selected_indices_rfe].tolist()
print(f"Top 5 features selected by RFE: {selected_features_rfe}")

cv_scores_rfe = cross_val_score(classifier, X_rfe, y, cv=cv)
print(f"Mean accuracy with RFE: {cv_scores_rfe.mean():.4f}")

print("\n--- Principal Component Analysis (PCA) ---")
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X_scaled)

feature_importance = np.abs(pca.components_)
feature_importance_sum = np.sum(feature_importance, axis=0)
top_features_indices_pca = np.argsort(feature_importance_sum)[-5:]
selected_features_pca = X.columns[top_features_indices_pca].tolist()

print(f"Variance explained by 5 components: {sum(pca.explained_variance_ratio_):.4f}")
print(f"Top 5 original features with highest impact in PCA: {selected_features_pca}")

cv_scores_pca = cross_val_score(classifier, X_pca, y, cv=cv)
print(f"Mean accuracy with PCA: {cv_scores_pca.mean():.4f}")

print("\n--- Summary ---")
print(f"UFS Mean Accuracy: {cv_scores_ufs.mean():.4f}, Top 5 features: {selected_features_ufs}")
print(f"RFE Mean Accuracy: {cv_scores_rfe.mean():.4f}, Top 5 features: {selected_features_rfe}")
print(f"PCA Mean Accuracy: {cv_scores_pca.mean():.4f}, Top 5 features: {selected_features_pca}")

all_features = set(selected_features_ufs + selected_features_rfe + selected_features_pca)
print(f"\nTotal unique features across all methods: {len(all_features)}")

common_features = []
for feature in all_features:
    methods = []
    if feature in selected_features_ufs:
        methods.append("UFS")
    if feature in selected_features_rfe:
        methods.append("RFE")
    if feature in selected_features_pca:
        methods.append("PCA")

    if len(methods) > 1:
        common_features.append((feature, methods))

if common_features:
    print("\nFeatures appearing in multiple methods:")
    for feature, methods in common_features:
        print(f"- {feature}: Found in {', '.join(methods)}")
else:
    print("\nNo common features found across methods")


--- Univariate Feature Selection (UFS) ---
Top 5 features selected by UFS: ['beta23', 'delta23', 'delta41', 'delta51', 'theta23']
Mean accuracy with UFS: 0.2750

--- Recursive Feature Elimination (RFE) ---
Top 5 features selected by RFE: ['beta23', 'delta43', 'delta44', 'delta47', 'gamma5']
Mean accuracy with RFE: 0.5750

--- Principal Component Analysis (PCA) ---
Variance explained by 5 components: 0.8003
Top 5 original features with highest impact in PCA: ['delta14', 'delta24', 'delta27', 'alpha17', 'alpha18']
Mean accuracy with PCA: 0.3250

--- Summary ---
UFS Mean Accuracy: 0.2750, Top 5 features: ['beta23', 'delta23', 'delta41', 'delta51', 'theta23']
RFE Mean Accuracy: 0.5750, Top 5 features: ['beta23', 'delta43', 'delta44', 'delta47', 'gamma5']
PCA Mean Accuracy: 0.3250, Top 5 features: ['delta14', 'delta24', 'delta27', 'alpha17', 'alpha18']

Total unique features across all methods: 14

Features appearing in multiple methods:
- beta23: Found in UFS, RFE
