In [33]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
import numpy as np
import warnings 
warnings.filterwarnings('ignore') 

train_df = pd.read_csv('high_level/train.csv')
test_df = pd.read_csv('high_level/test.csv')

In [34]:
rf_model = RandomForestClassifier(random_state=42)
dt_model = DecisionTreeClassifier(random_state=42)
xgb_model = XGBClassifier(random_state=42)
logreg_model = LogisticRegression(random_state=42)
svm_model = SVC(random_state=42)  
knn_model = KNeighborsClassifier()

In [35]:
def compute_cm(X_train, y_train, X_test, y_test):
    cm = []
    for i, model in enumerate([rf_model, dt_model, xgb_model, logreg_model, svm_model, knn_model]):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        cm.append(confusion_matrix(y_test, y_pred))
    return cm

In [36]:
def compute_probs(cm):
    likelihood_probs = []
    for m in cm:
        likelihood_probs.append(m / m.sum(axis=1, keepdims=True))
    return likelihood_probs

In [37]:
def bayesian_consensus(prior_prob, l_prob):
    return prior_prob[0] * (l_prob[0]) / ((l_prob[0]*prior_prob[0]) + (l_prob[1]*prior_prob[1]))

In [38]:
def get_result(prior_probs, labels, likelihood_probs):
    for label, prob in zip(labels, likelihood_probs):
        if label == "yes":
            prior_probs[0] = bayesian_consensus(prior_probs, prob[:, 0])
            prior_probs[1] = 1 - prior_probs[0]
        else:
            prior_probs[0] = bayesian_consensus(prior_probs, prob[:, 1])
            prior_probs[1] = 1 - prior_probs[0]
    return prior_probs

In [39]:
def final_result(class_string, class_int):
    prior_probs = [0.5, 0.5]
    labels = ["yes", "no", "yes", "no", "yes", "yes"]
    train_df[class_string] = (train_df['class'] == class_int).astype(dtype=int) 
    test_df[class_string] = (test_df['class'] == class_int).astype(dtype=int) 
    X_train, y_train = train_df.iloc[:, 1:-1], train_df[class_string]
    X_test, y_test = test_df.iloc[:, 1:-1], test_df[class_string]

    cm = compute_cm(X_train, y_train, X_test, y_test)
    likelihood_probs = compute_probs(cm)
    print(f'Class {class_int}: {get_result(prior_probs, labels, likelihood_probs)}')


In [40]:
final_result('class_new', 0)
final_result('class_new', 1)
final_result('class_new', 2)
final_result('class_new', 3)
final_result('class_new', 4)
final_result('class_new', 5)
final_result('class_new', 6)

Class 0: [np.float64(0.27008684818406514), np.float64(0.7299131518159349)]
Class 1: [np.float64(0.2930009844766963), np.float64(0.7069990155233037)]
Class 2: [np.float64(0.7185667645386442), np.float64(0.2814332354613558)]
Class 3: [np.float64(0.0), np.float64(1.0)]
Class 4: [np.float64(0.39160902406174375), np.float64(0.6083909759382562)]
Class 5: [np.float64(0.1004642408826373), np.float64(0.8995357591173627)]
Class 6: [np.float64(1.0), np.float64(0.0)]
