In [1]:
import pandas as pd
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


# Enter path to the data_fusion_guest_lecture file
image_folder_path = r'C:\Users\Lenovo\Documents\UM\Period 4\Data Fusion\asd\seedling_labels_with_features_and_predictions.csv'

# Loads labels
df = pd.read_csv(image_folder_path)

#df = df.drop(columns=['Pos','average_expert'])
#df = df.iloc[:, -8:]
decisions = df.loc[:,['Expert 1','Expert 2','Expert 3', 'Expert 4']]

In [5]:
# Define a function to convert the values to binary
def convert_to_binary(x):
    if x in [1, 2]:
        return 1
    else:
        return 0

# Apply the function to each element in the dataframe
decisions = decisions.applymap(convert_to_binary)
features = df.iloc[:,10:16]
voting = pd.concat([features,decisions],axis = 1)
voting.head()

Unnamed: 0,plant_width_side_view,plant_height_side_view,plant_area_side_view,plant_width_top_view,plant_height_top_view,plant_area_top_view,Expert 1,Expert 2,Expert 3,Expert 4
0,120,239,8577,263,277,18368,0,0,0,0
1,591,407,29037,585,548,85112,1,1,1,1
2,388,449,31946,450,692,86757,1,1,1,1
3,92,242,8694,281,276,23174,0,0,0,0
4,582,494,42052,670,534,102178,0,1,1,1


In [7]:
##MAJORITY VOTING

# Define the fusion rule (majority voting)
def majority_voting(expert_decisions):
    return int(expert_decisions.mean() > 0.5)

# Apply the fusion rule to the dataset
voting['majority'] = decisions.apply(majority_voting, axis=1)
# Print the final classifications
print(voting.head())


   plant_width_side_view  plant_height_side_view  plant_area_side_view  \
0                    120                     239                  8577   
1                    591                     407                 29037   
2                    388                     449                 31946   
3                     92                     242                  8694   
4                    582                     494                 42052   

   plant_width_top_view  plant_height_top_view  plant_area_top_view  Expert 1  \
0                   263                    277                18368         0   
1                   585                    548                85112         1   
2                   450                    692                86757         1   
3                   281                    276                23174         0   
4                   670                    534               102178         0   

   Expert 2  Expert 3  Expert 4  majority  
0         0         0   

In [8]:
from scipy.stats import beta
# Define the fusion function
def bayesian_fusion(expert_decisions):
    # Compute the beta distribution parameters based on expert decisions
    alpha = expert_decisions.sum()
    beta_ = expert_decisions.size - alpha
    
    # Compute the posterior probability distribution
    p = beta.cdf(0.5, alpha, beta_)
    
    return p

# Apply the fusion function to the dataset
voting['bayes_consensus'] = decisions.apply(bayesian_fusion, axis=1)

# Define the decision threshold
threshold = 0.5

# Make final classification decisions based on the threshold
voting['bayes_consensus'] = (voting['bayes_consensus'] > threshold).astype(int)
print(voting.head())

   plant_width_side_view  plant_height_side_view  plant_area_side_view  \
0                    120                     239                  8577   
1                    591                     407                 29037   
2                    388                     449                 31946   
3                     92                     242                  8694   
4                    582                     494                 42052   

   plant_width_top_view  plant_height_top_view  plant_area_top_view  Expert 1  \
0                   263                    277                18368         0   
1                   585                    548                85112         1   
2                   450                    692                86757         1   
3                   281                    276                23174         0   
4                   670                    534               102178         0   

   Expert 2  Expert 3  Expert 4  majority  bayes_consensus  
0      

In [85]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from scipy.stats import mode
from scipy.stats import bayes_mvs

X = voting.iloc[:, 0:7]
y = voting['majority']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=57)


In [71]:
# Train different classifiers
clf1 = RandomForestClassifier(random_state=57)
clf2 = AdaBoostClassifier(random_state=57)
clf3 = LogisticRegression(random_state=57)
clf4 = GaussianNB()

clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)
clf3.fit(X_train, y_train)
clf4.fit(X_train, y_train)


In [80]:
from scipy.stats import bayes_mvs

def decision_fusion(y_preds, method='hard'):
    if method == 'hard':
        return mode(y_preds, axis=0, keepdims=True)[0][0]
    elif method == 'soft':
        return np.round(np.mean(y_preds, axis=0))
    elif method == 'bayesian':
        means, _, _ = bayes_mvs(y_preds.T, alpha=0.95)
        return np.array([np.round(means.statistic) for _ in range(y_preds.shape[1])])
    else:
        raise ValueError("Invalid method")


In [81]:
y_pred1 = clf1.predict(X_test)
y_pred2 = clf2.predict(X_test)
y_pred3 = clf3.predict(X_test)
y_pred4 = clf4.predict(X_test)

y_preds = np.vstack((y_pred1, y_pred2, y_pred3, y_pred4))


In [82]:
y_pred_hard = decision_fusion(y_preds, method='hard')
y_pred_soft = decision_fusion(y_preds, method='soft')
y_pred_bayesian = decision_fusion(y_preds, method='bayesian')


In [83]:
print("Hard Voting Accuracy:", accuracy_score(y_test, y_pred_hard))
print("Soft Voting Accuracy:", accuracy_score(y_test, y_pred_soft))
print("Bayesian Consensus Accuracy:", accuracy_score(y_test, y_pred_bayesian))


Hard Voting Accuracy: 0.9547738693467337
Soft Voting Accuracy: 0.9547738693467337
Bayesian Consensus Accuracy: 0.6834170854271356
