In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier
from xverse.ensemble import VotingSelector
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [None]:
data = pd.read_csv("/Users/ronnitrana/ACSEF/glioma-grading/data/UCI/TCGA_InfoWithGrade.csv")
data_randomized = data.sample(frac=1, random_state=42)

X = data_randomized.drop('Grade', axis=1)
y = data_randomized['Grade']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
logreg_pipe = Pipeline([('scaler', StandardScaler()), ('logreg', LogisticRegression(random_state=42, max_iter=1000))])
svm_pipe = Pipeline([('scaler', StandardScaler()), ('svm', SVC(probability=True, random_state=42))])
knn_pipe = Pipeline([('scaler', StandardScaler()), ('knn', KNeighborsClassifier())])
rf_pipe = Pipeline([('scaler', StandardScaler()), ('rf', RandomForestClassifier(random_state=42))])
ada_pipe = Pipeline([('scaler', StandardScaler()), ('ada', AdaBoostClassifier(random_state=42))])

ensembles = {
    'LR_SVM_KNN': VotingClassifier(estimators=[('lr', logreg_pipe), ('svm', svm_pipe), ('knn', knn_pipe)], voting='soft'),
    'LR_SVM_RF': VotingClassifier(estimators=[('lr', logreg_pipe), ('svm', svm_pipe), ('rf', rf_pipe)], voting='soft'),
    'LR_SVM_ADA': VotingClassifier(estimators=[('lr', logreg_pipe), ('svm', svm_pipe), ('ada', ada_pipe)], voting='soft'),
    'LR_KNN_RF': VotingClassifier(estimators=[('lr', logreg_pipe), ('knn', knn_pipe), ('rf', rf_pipe)], voting='soft'),
    'LR_KNN_ADA': VotingClassifier(estimators=[('lr', logreg_pipe), ('knn', knn_pipe), ('ada', ada_pipe)], voting='soft'),
    'LR_RF_ADA': VotingClassifier(estimators=[('lr', logreg_pipe), ('rf', rf_pipe), ('ada', ada_pipe)], voting='soft'),
    'SVM_KNN_RF': VotingClassifier(estimators=[('svm', svm_pipe), ('knn', knn_pipe), ('rf', rf_pipe)], voting='soft'),
    'SVM_KNN_ADA': VotingClassifier(estimators=[('svm', svm_pipe), ('knn', knn_pipe), ('ada', ada_pipe)], voting='soft'),
    'SVM_RF_ADA': VotingClassifier(estimators=[('svm', svm_pipe), ('rf', rf_pipe), ('ada', ada_pipe)], voting='soft'),
    'KNN_RF_ADA': VotingClassifier(estimators=[('knn', knn_pipe), ('rf', rf_pipe), ('ada', ada_pipe)], voting='soft'),
    'LR_SVM_KNN_RF': VotingClassifier(estimators=[('lr', logreg_pipe), ('svm', svm_pipe), ('knn', knn_pipe), ('rf', rf_pipe)], voting='soft'),
    'LR_SVM_KNN_ADA': VotingClassifier(estimators=[('lr', logreg_pipe), ('svm', svm_pipe), ('knn', knn_pipe), ('ada', ada_pipe)], voting='soft'),
    'LR_SVM_RF_ADA': VotingClassifier(estimators=[('lr', logreg_pipe), ('svm', svm_pipe), ('rf', rf_pipe), ('ada', ada_pipe)], voting='soft'),
    'LR_KNN_RF_ADA': VotingClassifier(estimators=[('lr', logreg_pipe), ('knn', knn_pipe), ('rf', rf_pipe), ('ada', ada_pipe)], voting='soft'),
    'SVM_KNN_RF_ADA': VotingClassifier(estimators=[('svm', svm_pipe), ('knn', knn_pipe), ('rf', rf_pipe), ('ada', ada_pipe)], voting='soft'),
    'LR_SVM_KNN_RF_ADA': VotingClassifier(estimators=[('lr', logreg_pipe), ('svm', svm_pipe), ('knn', knn_pipe), ('rf', rf_pipe), ('ada', ada_pipe)], voting='soft'),
}

In [None]:
def evaluate_ensembles(ensemble, x_data, y_data):
    results_values = []
    results_print = []
    for i, ensemble in enumerate(ensembles.values()):
        scores = cross_val_score(ensemble, x_data, y_data, cv=5)
        avg_score = np.mean(scores)
        results_values.append(avg_score)
        results_print.append(f'Average score for ensemble {i+1}: {avg_score}')

    return results_values, results_print

In [None]:
trial1_results, trial1_print = evaluate_ensembles(ensembles, X_train, y_train)
for score_print in trial1_print:
    print(score_print)