In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import LeaveOneOut, StratifiedKFold
from matplotlib.patches import Ellipse
from scipy.stats import chi2


A="data/.csv"
def plot_lda_with_ellipse(scale_bool):
    polymers = pd.read_csv(A) 
    polymers_train = polymers[polymers["test"]==1]
    y_train_dummies = polymers_train["sample"]
    x_train = polymers_train.drop(["test", "sample"], axis=1)

    lda = LinearDiscriminantAnalysis(n_components=2)
    x_scores = lda.fit(x_train, y_train_dummies).transform(x_train)

def lda_loocv():
    polymers = pd.read_csv(A) 
    polymers_data = polymers[polymers["test"] == 1]
    y = polymers_data["sample"]
    x = polymers_data.drop((["test", "sample"]), axis=1)

    loo = LeaveOneOut()

    misclassified = []
    actual_classes = []
    predicted_classes = []

    for train_index, test_index in loo.split(x):
        X_train, X_test = x.iloc[train_index], x.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        lda = LinearDiscriminantAnalysis(n_components=2).fit(X_train, y_train)
        prediction = lda.predict(X_test)

        if prediction != y_test.values[0]:
            misclassified.append((y_test.values[0], prediction[0]))

        actual_classes.append(y_test.values[0])
        predicted_classes.append(prediction[0])

    for actual, predicted in misclassified:
        print(f"Actual Class: {actual}, Predicted Class: {predicted}")

    accuracy = np.mean(np.array(actual_classes) == np.array(predicted_classes))
    print('Leave-One-Out Cross-Validation Accuracy (LOOCV): {:.2f}%'.format(accuracy * 100))


def lda_kfold():
    polymers = pd.read_csv(A) 
    polymers_data = polymers[polymers["test"] == 1]
    y = polymers_data["sample"]
    x = polymers_data.drop((["test", "sample"]), axis=1)

    stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
    test_scores = []

    for train_index, test_index in stratified_kfold.split(x, y):
        X_train, X_test = x.iloc[train_index], x.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        lda = LinearDiscriminantAnalysis(n_components=2).fit(X_train, y_train)
        score = lda.score(X_test, y_test)
        test_scores.append(score)
        print('Test set score: {}'.format(score))

    overall_accuracy = np.mean(test_scores)
    print('Overall Test Set Accuracy: {:.2f}%'.format(overall_accuracy * 100))


def main():
    lda_loocv()
    lda_kfold()


if __name__ == "__main__":
    main()