In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
import csv
dt = pd.read_csv("C:\\Users\\Gebruiker\\Python\\RWG\\CAPExp.csv")


electrodes = ["Fp1", "Fp2", "F7", "F3", "Fz", "F4", "F8", "FC5", "FC1",
              "FC2", "FC6", "C3", "Cz", "C4", "CP5", "CP1", "CP2", "CP6",
              "P7", "P3", "Pz", "P4", "P8", "O1", "Oz", "O2"]


N400_data = dt[dt["Timestamp"].isin(range(300, 500))]
P600_data = dt[dt["Timestamp"].isin(range(600, 1000))]


N400_by_trial = N400_data.groupby(["TrialNum", "Subject", "Condition"])[electrodes].mean()
P600_by_trial = P600_data.groupby(["TrialNum", "Subject", "Condition"])[electrodes].mean()


N400_data = np.array([np.array(N400_by_trial.loc[trial]) for trial in N400_by_trial.index])
P600_data = np.array([np.array(P600_by_trial.loc[trial]) for trial in P600_by_trial.index])

N400_w = N400_by_trial.reset_index()
P600_w = P600_by_trial.reset_index()

N400_z = N400_w["Condition"]
P600_z = P600_w["Condition"]

N400_X = np.array(N400_by_trial.groupby(["TrialNum"]).mean().values.tolist())
P600_X = np.array(P600_by_trial.groupby(["TrialNum"]).mean().values.tolist())

s = pd.Series(N400_z)

N400_y = s.values

s = pd.Series(P600_z)

P600_y = s.values

N400_X_train, N400_X_test, N400_y_train, N400_y_test = train_test_split(N400_X, N400_y, test_size=0.25, random_state=42)
N400_X_test, N400_X_val, N400_y_test, N400_y_val = train_test_split(N400_X_test, N400_y_test, test_size=0.5, random_state=42)

P600_X_train, P600_X_test, P600_y_train, P600_y_test = train_test_split(P600_X, P600_y, test_size=0.25, random_state=42)
P600_X_test, P600_X_val, P600_y_test, P600_y_val = train_test_split(P600_X_test, P600_y_test, test_size=0.5, random_state=42)

le = LabelEncoder()

N400_y_train_encoded = le.fit_transform(N400_y_train)
N400_y_val_encoded = le.transform(N400_y_val)
N400_y_test_encoded = le.transform(N400_y_test)

P600_y_train_encoded = le.fit_transform(P600_y_train)
P600_y_val_encoded = le.transform(P600_y_val)
P600_y_test_encoded = le.transform(P600_y_test)

k_values = range(1,1000)

N400_train_accuracies = []
N400_val_accuracies = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(N400_X_train, N400_y_train_encoded)
    train_mse = mean_squared_error(N400_y_train_encoded, knn.predict(N400_X_train))
    val_mse = mean_squared_error(N400_y_val_encoded, knn.predict(N400_X_val))
    N400_train_accuracies.append(train_mse)
    N400_val_accuracies.append(val_mse)
 
N400_best_k = k_values[np.argmin(N400_val_accuracies)]
print("Best k for N400 data:", N400_best_k)

k_values_1 = range(1,1000)

P600_train_accuracies = []
P600_val_accuracies = []

for k in k_values_1:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(P600_X_train, P600_y_train_encoded)
    train_mse = mean_squared_error(P600_y_train_encoded, knn.predict(P600_X_train))
    val_mse = mean_squared_error(P600_y_val_encoded, knn.predict(P600_X_val))
    P600_train_accuracies.append(train_mse)
    P600_val_accuracies.append(val_mse)
    
P600_best_k = k_values_1[np.argmin(P600_val_accuracies)]
print("Best k for P600 data:", P600_best_k)

knn = KNeighborsClassifier(n_neighbors=N400_best_k)
N400_cv_accuracies = cross_val_score(knn, N400_X_train, N400_y_train_encoded, cv=5, scoring="accuracy")
knn.fit(N400_X_train, N400_y_train_encoded)
N400_test_accuracy = accuracy_score(N400_y_test_encoded, knn.predict(N400_X_test))

knn = KNeighborsClassifier(n_neighbors=P600_best_k)
P600_cv_accuracies = cross_val_score(knn, P600_X_train, P600_y_train_encoded, cv=5, scoring="accuracy")
knn.fit(P600_X_train, P600_y_train_encoded)
P600_test_accuracy = accuracy_score(P600_y_test_encoded, knn.predict(P600_X_test))

print("Cross-Validation Accuracy Scores for N400 data:")
print(N400_cv_accuracies)
print("Test Accuracy for N400 data:", N400_test_accuracy)
print()
print("Cross-Validation Accuracy Scores for P600 data:")
print(P600_cv_accuracies)
print("Test Accuracy for P600 data:", P600_test_accuracy)

knn = KNeighborsClassifier(n_neighbors=N400_best_k)
knn.fit(N400_X_train, N400_y_train_encoded)
N400_test_probabilities = knn.predict_proba(N400_X_test)
N400_roc_auc = roc_auc_score(N400_y_test_encoded, N400_test_probabilities, multi_class='ovr')

knn = KNeighborsClassifier(n_neighbors=P600_best_k)
knn.fit(P600_X_train, P600_y_train_encoded)
P600_test_probabilities = knn.predict_proba(P600_X_test)
P600_roc_auc = roc_auc_score(P600_y_test_encoded, P600_test_probabilities, multi_class='ovr')

print("ROC-AUC Score for N400 data:", N400_roc_auc)
print("ROC-AUC Score for P600 data:", P600_roc_auc)

logreg4 = LogisticRegression()
logreg4.fit(N400_X_train, N400_y_train)
N400_test_probabilities = logreg4.predict_proba(N400_X_test)
LGN400_roc_auc = roc_auc_score(N400_y_test, N400_test_probabilities, multi_class='ovr')
N400_cv_scores = cross_val_score(logreg4, N400_X_train, N400_y_train, cv=5, scoring="accuracy")
LGN400_test_accuracy = logreg4.score(N400_X_test, N400_y_test)

logreg6 = LogisticRegression()
logreg6.fit(P600_X_train, P600_y_train)
P600_test_probabilities = logreg6.predict_proba(P600_X_test)
LGP600_roc_auc = roc_auc_score(P600_y_test, P600_test_probabilities, multi_class='ovr')
P600_cv_scores = cross_val_score(logreg6, P600_X_train, P600_y_train, cv=5, scoring="accuracy")
LGP600_test_accuracy = logreg6.score(P600_X_test, P600_y_test)

print("ROC-AUC Score for N400 data:", LGN400_roc_auc)
print("Cross-Validation Accuracy Scores for N400 data:")
print(N400_cv_scores)
print("Test Accuracy for N400 data:", LGN400_test_accuracy)
print()
print("ROC-AUC Score for P600 data:", LGP600_roc_auc)
print("Cross-Validation Accuracy Scores for P600 data:")
print(P600_cv_scores)
print("Test Accuracy for P600 data:", LGP600_test_accuracy)

data = [
    ["Algorithm", "Data", "Fold 1", "Fold 2", "Fold 3", "Fold 4", "Fold 5", "Average", "Test Accuracy", "ROC-AUC"],
    ["KNN", "N400", *[round(score, 4) for score in N400_cv_accuracies], 
        round(sum(N400_cv_accuracies) / len(N400_cv_accuracies), 4),
        round(N400_test_accuracy, 4), round(N400_roc_auc, 4)],
    ["KNN", "P600", *[round(score, 4) for score in P600_cv_accuracies], 
        round(sum(P600_cv_accuracies) / len(P600_cv_accuracies), 4),
        round(P600_test_accuracy, 4), round(P600_roc_auc, 4)],
    ["Logistic Regression", "N400", *[round(score, 4) for score in N400_cv_scores], 
        round(sum(N400_cv_scores) / len(N400_cv_scores), 4),
        round(LGN400_test_accuracy, 4), round(LGN400_roc_auc, 4)],
    ["Logistic Regression", "P600", *[round(score, 4) for score in P600_cv_scores], 
        round(sum(P600_cv_scores) / len(P600_cv_scores), 4),
        round(LGP600_test_accuracy, 4), round(LGP600_roc_auc, 4)]
]

filename = "tablemodel.csv"
with open(filename, "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerows(data)

print("Table saved as", filename)