In [269]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
import numpy as np
import warnings 
warnings.filterwarnings('ignore') 

train_df = pd.read_csv('high_level/train.csv')
test_df = pd.read_csv('high_level/test.csv')

In [270]:
train_df['class_1'] = (train_df['class'] == 1).astype(dtype=int) 
test_df['class_1'] = (test_df['class'] == 1).astype(dtype=int) 

In [271]:
X_train, y_train = train_df.iloc[:, 1:-1], train_df['class_1']
X_test, y_test = test_df.iloc[:, 1:-1], test_df['class_1']

In [272]:
rf_model = RandomForestClassifier(random_state=42)
dt_model = DecisionTreeClassifier(random_state=42)
xgb_model = XGBClassifier(random_state=42)
logreg_model = LogisticRegression(random_state=42)
svm_model = SVC(random_state=42)  
knn_model = KNeighborsClassifier()
cm = []

for i, model in enumerate([rf_model, dt_model, xgb_model, logreg_model, svm_model, knn_model]):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    cm.append(confusion_matrix(y_test, y_pred))
cm

[array([[56,  2],
        [ 4, 11]]),
 array([[49,  9],
        [ 6,  9]]),
 array([[56,  2],
        [ 6,  9]]),
 array([[57,  1],
        [ 7,  8]]),
 array([[56,  2],
        [ 7,  8]]),
 array([[53,  5],
        [ 5, 10]])]

In [273]:
likelihood_probs = []
for m in cm:
    likelihood_probs.append(m / m.sum(axis=1, keepdims=True))
likelihood_probs

[array([[0.96551724, 0.03448276],
        [0.26666667, 0.73333333]]),
 array([[0.84482759, 0.15517241],
        [0.4       , 0.6       ]]),
 array([[0.96551724, 0.03448276],
        [0.4       , 0.6       ]]),
 array([[0.98275862, 0.01724138],
        [0.46666667, 0.53333333]]),
 array([[0.96551724, 0.03448276],
        [0.46666667, 0.53333333]]),
 array([[0.9137931 , 0.0862069 ],
        [0.33333333, 0.66666667]])]

In [274]:
def bayesian_consensus(prior_prob, l_prob):
    return prior_prob[0] * (l_prob[0]) / ((l_prob[0]*prior_prob[0]) + (l_prob[1]*prior_prob[1]))

In [275]:
def get_result(prior_probs, labels, likelihood_probs):
    for label, prob in zip(labels, likelihood_probs):
        if label == "yes":
            prior_probs[0] = bayesian_consensus(prior_probs, prob[:, 0])
            prior_probs[1] = 1 - prior_probs[0]
        else:
            prior_probs[0] = bayesian_consensus(prior_probs, prob[:, 1])
            prior_probs[1] = 1 - prior_probs[0]
        print(prior_probs)
    return prior_probs

In [276]:
prior_probs = [0.5, 0.5]
labels = ["yes", "no", "yes", "no", "yes", "yes"]
get_result(prior_probs, labels, likelihood_probs)

[np.float64(0.7835820895522387), np.float64(0.21641791044776126)]
[np.float64(0.4835738409579367), np.float64(0.5164261590420633)]
[np.float64(0.6932741404029479), np.float64(0.3067258595970521)]
[np.float64(0.06809270671216418), np.float64(0.9319072932878358)]
[np.float64(0.13132264991338433), np.float64(0.8686773500866156)]
[np.float64(0.2930009844766963), np.float64(0.7069990155233037)]


[np.float64(0.2930009844766963), np.float64(0.7069990155233037)]