In [1]:
import pandas as pd
import numpy as np

In [6]:
data = {
'Anneal':[898, 92.09, 79.62, 87.19],
'Australia':[690, 85.51, 76.81, 84.78],
'Auto':[205, 81.95, 58.05, 70.73],
'Breast':[699, 95.14, 95.99, 96.42],
'Cleve':[303, 76.24, 83.50, 84.49],
'Credit':[690, 85.80, 77.54, 85.07],
'Diabetes':[768, 72.40, 75.91, 76.82],
'German':[1000, 70.90, 74.70, 74.40],
'Glass':[214, 67.29, 48.59, 59.81],
'Heart':[270, 80.00, 84.07, 83.70],
'Hepatitis':[155, 81.94, 83.23, 87.10],
'Horse':[368, 85.33, 78.80, 82.61],
'Ionosphere':[351, 89.17, 82.34, 88.89],
'Iris':[150, 94.67, 95.33, 96.00],
'Labor':[57, 78.95, 94.74, 92.98],
'Led7':[3200, 73.34, 73.16, 73.56],
'Lymphography':[148, 77.03, 83.11, 86.49],
'Pima':[768, 74.35, 76.04, 76.95],
'Sonar':[208, 78.85, 69.71, 76.92],
'Tic-tac-toe':[958, 83.72, 70.04, 98.33],
'Vehicle':[846, 71.04, 45.04, 74.94],
'Wine':[178, 94.38, 96.63, 98.88],
'Zoo':[101, 93.07, 93.07, 96.04]
}

In [8]:
data = pd.DataFrame(data).T
data.columns = ['Size', 'Decision Tree', 'Bayes', 'SVM']
data.head()

Unnamed: 0,Size,Decision Tree,Bayes,SVM
Anneal,898.0,92.09,79.62,87.19
Australia,690.0,85.51,76.81,84.78
Auto,205.0,81.95,58.05,70.73
Breast,699.0,95.14,95.99,96.42
Cleve,303.0,76.24,83.5,84.49


In [11]:
def calculate_and_compare_z_score(data, classifier_1, classifier_2):
    p_a = data[classifier_1] / 100
    p_b = data[classifier_2] / 100
    n = data['Size']
    
    comparison = []
    for i in range(len(data)):
        if p_a.iloc[i] == p_b.iloc[i]:
            comparison.append('Equal')
        else:
            p = (p_a.iloc[i] + p_b.iloc[i]) / 2
            standard_error = np.sqrt((p * (1 - p)) / n.iloc[i])
            if standard_error == 0:
                comparison.append('Equal')
            else:
                z = (p_a.iloc[i] - p_b.iloc[i]) / (standard_error * np.sqrt(2))
                if z > 1.96:
                    comparison.append(classifier_1)
                elif z < -1.96:
                    comparison.append(classifier_2)
                else:
                    comparison.append('Equal')
    data[f'{classifier_1} vs {classifier_2}'] = comparison
    return data

data = calculate_and_compare_z_score(data, 'Decision Tree', 'Bayes')
data = calculate_and_compare_z_score(data, 'Decision Tree', 'SVM')
data = calculate_and_compare_z_score(data, 'Bayes', 'SVM')
data.sort_values(by='Size')

Unnamed: 0,Size,Decision Tree,Bayes,SVM,Decision Tree vs Bayes,Decision Tree vs SVM,Bayes vs SVM
Labor,57.0,78.95,94.74,92.98,Bayes,SVM,Equal
Zoo,101.0,93.07,93.07,96.04,Equal,Equal,Equal
Lymphography,148.0,77.03,83.11,86.49,Equal,SVM,Equal
Iris,150.0,94.67,95.33,96.0,Equal,Equal,Equal
Hepatitis,155.0,81.94,83.23,87.1,Equal,Equal,Equal
Wine,178.0,94.38,96.63,98.88,Equal,SVM,Equal
Auto,205.0,81.95,58.05,70.73,Decision Tree,Decision Tree,SVM
Sonar,208.0,78.85,69.71,76.92,Decision Tree,Equal,Equal
Glass,214.0,67.29,48.59,59.81,Decision Tree,Equal,SVM
Heart,270.0,80.0,84.07,83.7,Equal,Equal,Equal


In [5]:
print('Decision Tree vs Bayes:')
print(data['Decision Tree vs Bayes'].value_counts())
print('--------------------\n')
print('Decision Tree vs SVM:')
print(data['Decision Tree vs SVM'].value_counts())
print('--------------------\n')
print('Bayes vs SVM:')
print(data['Bayes vs SVM'].value_counts())
print('--------------------\n')

Decision Tree vs Bayes:
Decision Tree vs Bayes
Equal            11
Decision Tree    10
Bayes             2
Name: count, dtype: int64
--------------------

Decision Tree vs SVM:
Decision Tree vs SVM
Equal            15
SVM               6
Decision Tree     2
Name: count, dtype: int64
--------------------

Bayes vs SVM:
Bayes vs SVM
Equal    15
SVM       8
Name: count, dtype: int64
--------------------

