In [2]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.feature_selection import mutual_info_classif, RFE
from sklearn.linear_model import LogisticRegression, ElasticNet
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split, cross_val_score
import matplotlib.pyplot as plt
%matplotlib inline

# Load the dataset from CSV file
data = pd.read_csv('C:/Users/United/downloads/hmmmm/trimmed_dataset3.csv')

# Separate features and target variable
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# 3. Feature Selection
# Mutual Information for feature ranking
mutual_info = mutual_info_classif(X, y)
mutual_info = pd.Series(mutual_info)
mutual_info.index = X.columns
mutual_info.sort_values(ascending=False, inplace=True)
selected_features = mutual_info[mutual_info > 0.01].index  # Selecting features with mutual info > 0.01

print(f"Selected features using Mutual Information: {', '.join(selected_features)}")

# Recursive Feature Elimination
model = LogisticRegression()
rfe = RFE(model, n_features_to_select=10)  # Selecting 10 best features
rfe.fit(X, y)
selected_features = X.columns[rfe.support_]

print(f"Selected features using RFE: {', '.join(selected_features)}")

# 4. Classification
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X[selected_features], y, test_size=0.2, random_state=42)

# Logistic Regression
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

# Support Vector Machine
svm = SVC(kernel='rbf', random_state=42)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)

# 5. Model Evaluation
# Classification metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc_roc = roc_auc_score(y_test, y_pred)

print(f"\nAccuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
print(f"AUC-ROC: {auc_roc:.2f}")

# Cross-validation
scores = cross_val_score(logreg, X[selected_features], y, cv=5, scoring='accuracy')
print(f"\nCross-validation scores: {scores}")
print(f"Mean cross-validation accuracy: {scores.mean():.2f}")

Selected features using Mutual Information: P7-T7, FT9-FT10, T7-P7, C4-P4, C3-P3, P7-O1, CZ-PZ, F7-T7, F3-C3, FZ-CZ, T7-FT9, P8-O2, P4-O2, P3-O1, T8-P8-1, F4-C4, T8-P8-0, # FP1-F7, FP2-F4, FP1-F3, FP2-F8, F8-T8, FT10-T8
Selected features using RFE: C3-P3, CZ-PZ, F4-C4, F7-T7, FP1-F3, FZ-CZ, P7-O1, P8-O2, T8-P8-0, T8-P8-1

Accuracy: 0.78
Precision: 0.79
Recall: 0.73
F1-score: 0.76
AUC-ROC: 0.78

Cross-validation scores: [0.501 0.501 0.501 0.501 0.502]
Mean cross-validation accuracy: 0.50
