# Import libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import average_precision_score
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
from sklearn import tree

import lime
import lime.lime_tabular

import holoviews as hv
import plotly.graph_objects as go

from ipysankeywidget import SankeyWidget

import shap

from sklearn.metrics import confusion_matrix

# Import functions

In [None]:
import ipynb
from ipynb.fs.defs.ML_Classifier_Comparison_Functions_Only import *

# Read CSV file

In [None]:
diabetesData = read_csv('Data/diabetes.csv')
diabetesData.head()

In [None]:
diabetesData.isnull().values.any() # for checking any missing value
# False means there is not missing value in this database

# Choose the most appropriate pivot
- Why did you choose 'Age'? What is the relationship between the Age and the Outcome?
    - if the average of 'Age's were the same(i.e. the range is small) -> no point
    - if the difference between Ages is big -> likely to be a good predictor(affected hard)

In [None]:
# figure out which predictor column has contributed to 'outcome == 1' the most
averages_for_each_outcome = diabetesData.groupby(['Outcome']).mean()
averages_for_each_outcome

# Check the proportion of each outcome in this dataset

In [None]:
proportion = diabetesData['Outcome'].value_counts()
print('There are %d 0s and %d 1s in diabetes dataset' % (proportion[0], proportion[1]))
print('Hence, %.1f%% percent of outcomes are 1' %(proportion[1]/len(diabetesData) * 100.0))

# Divide datasets into 2

In [None]:
diabetesData['Age'].median()

In [None]:
# divide the database into 2 such that 2 classifiers can be generated on different datasets
youngerDiabetes = diabetesData[diabetesData['Age'] < 29]
olderDiabetes = diabetesData[diabetesData['Age'] >= 29]

# Create and Train 2 decision tree classifiers

In [None]:
# select features (all columns except 'outcome')
features = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
target = ['Outcome']

# data of X (predictors)
younger_X, older_X = split_to_2_predictors(youngerDiabetes, olderDiabetes, features)
# data of Y (target)
younger_y, older_y = split_to_2_targets(youngerDiabetes, olderDiabetes, target)

In [None]:
# splitting into validation(testing) and training
# the default test_size is 0.25 given not specifying both train_size and test_size (train_size = 0.75
# and 25:75 is good rool of thumb
# 'stratify' - It makes a split such that the proportion of values in the sample produced
# will be the same as the proportion of values provided to the parameter stratify
younger_X_train, younger_X_test, younger_y_train, younger_y_test = train_test_split_data(younger_X, younger_y)
older_X_train, older_X_test, older_y_train, older_y_test = train_test_split_data(older_X, older_y)

In [None]:
# specify decision tree model
# explore this later to justify why depth=10 (could be 5? or smaller)
younger_decisionTree_model = create_decision_tree()
older_decisionTree_model = create_decision_tree()

# train(fit) decision tree classifier
fit_decision_tree(younger_X_train, younger_y_train, younger_decisionTree_model)
fit_decision_tree(older_X_train, older_y_train, older_decisionTree_model)

In [None]:
# create test cases to examine the performance of classifiers
total_testcases = pd.DataFrame(older_X_test.append(younger_X_test))
total_testoutcome = pd.DataFrame(older_y_test.append(younger_y_test))

# Print accuracies and precisions of classifiers
## Improve these accuracies to make more realistic! (explore this later)

### Accuracy = a ratio of correctly predicted observation to the total observations
- A great measure but only when you have symmetric datasets where values of false positive and false negatives are almost same
- Accuracy = TP+TN/TP+FP+FN+TN

### Precision = a ratio of correctly predicted positive observations to the total predicted positive observations
- High precision relates to the low false positive rate
- Precision = TP/TP+FP

-- Source: https://blog.exsilio.com/all/accuracy-precision-recall-f1-score-interpretation-of-performance-measures/
           https://towardsdatascience.com/machine-learning-classifiers-comparison-with-python-33149aecdbca
           https://towardsdatascience.com/various-ways-to-evaluate-a-machine-learning-models-performance-230449055f15
           https://medium.com/@vaibhavpaliwal/comparing-machine-learning-algorithms-on-a-single-dataset-classification-46ffc5d3f278

In [None]:
# predict the response for test dataset
older_y_pred = test_decision_tree(older_decisionTree_model, older_X_test)
older_y_pred_opposite = test_decision_tree(older_decisionTree_model, younger_X_test)

younger_y_pred = test_decision_tree(younger_decisionTree_model, younger_X_test)
younger_y_pred_opposite = younger_decisionTree_model.predict(older_X_test)

print("Accuracy of 'younger' decision tree against 'younger' test:", calculate_accuracy(younger_y_test, younger_y_pred))
print("Accuracy of 'younger' decision tree against 'older' test:", calculate_accuracy(older_y_test, younger_y_pred_opposite))
print("Accuracy of 'older' decision tree against 'older' test:", calculate_accuracy(older_y_test, older_y_pred))
print("Accuracy of 'older' decision tree against 'younger' test:", calculate_accuracy(younger_y_test, older_y_pred_opposite))

print("\n")

# tells me how those 2 classifiers are similar to each other
younger_testcases_predictions_similarity = calculate_similarity(older_y_pred_opposite, younger_y_pred)
older_testcases_predictions_similarity = calculate_similarity(younger_y_pred_opposite, older_y_pred)

print("How similar the results of older tree and younger tree tested against 'younger' testcases are:", younger_testcases_predictions_similarity)
print("How similar the results of older tree and younger tree tested against 'older' testcases are:", older_testcases_predictions_similarity)
print("-- These tell me how similar the 2 trained decision classifiers are")

# print("-- There are %d test cases in younger, and %d test cases in older" %(len(younger_X_test), len(older_X_test)))

print("-- Given that they the similarity was approximately %.2f%% percent over 'younger' testcases, around %d testcases were disagreed" %(younger_testcases_predictions_similarity, number_of_disagreed_cases(younger_X_test, younger_testcases_predictions_similarity)))
print("-- Given that they the similarity was approximately %.2f%% percent over 'older' testcases, around %d testcases were disagreed" %(older_testcases_predictions_similarity, number_of_disagreed_cases(older_X_test, older_testcases_predictions_similarity)))
      
      

In [None]:
print("Average precision:", average_precision_score(younger_y_test, younger_y_pred))
print("Average precision:", average_precision_score(older_y_test, older_y_pred))

# Optimising decision tree performance
### Decision Tree Criterion
- {“gini”, “entropy”}, default=”gini”
- Both gini and entropy options are measures of 'impurity' of a node. A node having multiple classes is impure while a node having only one class is pure (most leaves are pure unless the tree has no attributes to split further on)
### Decision Tree Splitter
- {“best”, “random”}, default=”best”
- "best" evaluates every split using criterion then split the node with the best one while "random" 
### Decision Tree Max Depth
- int or None, optional (default = None) 

In [None]:
younger_entropy_decisionTree_model = DecisionTreeClassifier(criterion='entropy', random_state=1, max_depth=6)
older_entropy_decisionTree_model = DecisionTreeClassifier(criterion='entropy', random_state=1, max_depth=6)
younger_entropy_decisionTree_model.fit(younger_X_train, younger_y_train)
older_entropy_decisionTree_model.fit(older_X_train, older_y_train)
younger_entropy_pred = younger_entropy_decisionTree_model.predict(younger_X_test)
older_entropy_pred = older_entropy_decisionTree_model.predict(older_X_test)

print("Accuracy of 'younger gini' decision tree against 'younger' test:", accuracy_score(younger_y_test, younger_y_pred))
print("Accuracy of 'older gini' decision tree against 'older' test:", accuracy_score(older_y_test, older_y_pred))
print("Accuracy of 'younger entropy' decision tree against 'younger' test:", accuracy_score(younger_y_test, younger_entropy_pred))
print("Accuracy of 'older entropy' decision tree against 'older' test:", accuracy_score(older_y_test, older_entropy_pred))

# WHY did they disagreed on those cases?
- Use feature_importance
- Train a new classifier which has new labels e.g. train on the same dataset but use for comparison but the labels will be whether they disagree or not...
- The datasets we used for comparison was younger_X_test
- Whether they disagree or not (the labels = np.logical_xor(older_y_pred_opposite, younger_y_pred)) this is the TARGET
- Don't train/test split just use all of them for training!! 
- For this ML classifier, print feature_importance_ (high importance tells me why they disagreed)
- No decision tree? Use logistic regression...

# Print feature_importance_ of younger testcases classifier
- This tells me which features effected how significantly on making disagreement between classifiers
- They are computed as the mean and standard deviation of accumulation of the impurity decrease within tree
- The importance of a feature is: how much this feature is used in tree. Formally, it is computed as the (normalized) total reduction of the criterion brought by that feature
-- source: https://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html
-- source: https://inria.github.io/scikit-learn-mooc/python_scripts/dev_features_importance.html

In [None]:
# Computing the disagreed cases (i.e. either when A right but B wrong / A wrong but B right)
younger_disagreed_testcases = return_disagreed_cases(younger_X_test, older_y_pred_opposite, younger_y_pred)
younger_agreed_testcases = return_agreed_cases(younger_X_test, older_y_pred_opposite, younger_y_pred)

all_younger_testcases = pd.DataFrame(younger_disagreed_testcases.append(younger_agreed_testcases))

# don't test this ML classifier
# data of X (predictors)
younger_testcases_X_train = all_younger_testcases[features]

# data of Y (target)
younger_testcases_y_train = all_younger_testcases['Agreed']

younger_testcases_decisionTree_model = create_decision_tree() # explore this later to justify why depth=10 (could be 5? or smaller)

# train(fit) decision tree classifier

fit_decision_tree(younger_testcases_X_train, younger_testcases_y_train, younger_testcases_decisionTree_model)

print_feature_importance(younger_testcases_decisionTree_model, features)


# Print feature_importance_ of older testcases classifier

In [None]:
# Computing the disagreed cases (i.e. either when A right but B wrong / A wrong but B right)
older_disagreed_testcases = return_disagreed_cases(older_X_test, younger_y_pred_opposite, older_y_pred)
older_agreed_testcases = return_agreed_cases(older_X_test, younger_y_pred_opposite, older_y_pred)

all_older_testcases = pd.DataFrame(older_disagreed_testcases.append(older_agreed_testcases))

# don't test this ML classifier
# data of X (predictors)
older_testcases_X_train = all_older_testcases[features]

# data of Y (target)
older_testcases_y_train = all_older_testcases['Agreed']

older_testcases_decisionTree_model = create_decision_tree() # explore this later to justify why depth=10 (could be 5? or smaller)

# train(fit) decision tree classifier

fit_decision_tree(older_testcases_X_train, older_testcases_y_train, older_testcases_decisionTree_model)

print_feature_importance(older_testcases_decisionTree_model, features)

# In which feature does each classifier rely on most

In [None]:
younger_importance=print_feature_importance(younger_decisionTree_model, features)
print("\n")
older_importance=print_feature_importance(older_decisionTree_model, features)

In [None]:
# how the two different classifiers rely on features
print(younger_importance - older_importance)
print("The output tells me Pregnancies, BloodPressure, Insulin and BMI features were more important in younger classifier")
print("While Glucose, DiabetesPedigreeFunction and Age features were more important in Older")

# Lime

-- Younger
- Left shows prediction probability of the two classes
- The middle chart shows the important features with their bounding values and the right table is the actual corresponding feature value in the observation row passed
-- source: https://towardsdatascience.com/a-guide-to-interpretable-machine-learning-2-fa3c4489fb53

# Explain for each classifier why they showed disagreement

In [None]:
# train using test cases where the labels are Agree and Disagree (0 and 1)
all_disagreed_testcases = pd.DataFrame(younger_disagreed_testcases.append(older_disagreed_testcases))
all_disagreed_testcases['Agreed'] = 0
all_agreed_testcases = pd.DataFrame(younger_agreed_testcases.append(older_agreed_testcases))
all_agreed_testcases['Agreed'] = 1

# younger_disagreed + older_disagreed + younger_agreed + older_agreed
all_testcases = pd.DataFrame(all_agreed_testcases.append(all_disagreed_testcases))

# specify decision tree model
all_testcases_decisionTree_model = create_decision_tree() # explore this later to justify why depth=10 (could be 5? or smaller)

all_testcases_train = all_testcases[features]

# train(fit) decision tree classifier
all_testcases_decisionTree_model.fit(all_testcases_train, all_testcases.Agreed)

### 1. Pick a random disagreed (all) testcases datapoint + Predict the outcome (Agreed/Disagreed) on each decision tree
- by testing on both younger_decisiontree_model and older_decisiontree_model, show they results differently (one says diabetes while other says no diabetes) with LIME

### Debugging for LIME explainer.as_list()
- exp.as_list takes as its parameter the label for which you want an explanation. Since you're calling explain_instance with top_labels=1, it only produces an explanation for the top prediction of new_clf. If that is label 1, exp.as_list() will work, as the default parameter is one. If it is not, you'll get a key error. Your options:

- If you're doing binary classification, don't use the top_labels parameter. You'll always get explanations for label 1
- If it's multilabel classification, call exp.as_list(new_clf.predict(samp.as_matrix())[0])
- If you want multiple explanations for different labels, call explain_instance with top_labels=num_labels
- Source: https://github.com/marcotcr/lime/issues/106

In [None]:
classes=['Disagreed','Agreed']

younger_X_train_numpy = younger_X_train.to_numpy()
all_disagreed_testcases_numpy = all_disagreed_testcases[features].to_numpy()

explainer = lime.lime_tabular.LimeTabularExplainer(younger_X_train_numpy,
                                                   feature_names=features,
                                                   class_names=classes,
                                                   discretize_continuous=True)

i = np.random.randint(0, len(all_disagreed_testcases_numpy))

older_X_train_numpy = older_X_train.to_numpy()
exp1 = explainer.explain_instance(all_disagreed_testcases_numpy[i], younger_decisionTree_model.predict_proba, num_features=8)
exp1_list = exp1.as_list()
print(exp1.as_map())
print('\n')
print(exp1_list)
exp1.show_in_notebook()

explainer = lime.lime_tabular.LimeTabularExplainer(older_X_train_numpy,
                                                   feature_names=features,
                                                   class_names=classes,
                                                   discretize_continuous=True)

exp2 = explainer.explain_instance(all_disagreed_testcases_numpy[i], older_decisionTree_model.predict_proba, num_features=8)
exp2_list = exp2.as_list()
print(exp2.as_map())
print('\n')
print(exp2_list)
exp2.show_in_notebook()


In [None]:
print(younger_decisionTree_model.predict(all_disagreed_testcases_numpy[i].reshape(1, -1)))
print(older_decisionTree_model.predict(all_disagreed_testcases_numpy[i].reshape(1, -1)))

In [None]:
exp1_map = exp1.as_map()[1]
exp1_map.sort()
exp2_map = exp2.as_map()[1]
exp2_map.sort()
exp1_map_df = pd.DataFrame(exp1_map)
exp2_map_df = pd.DataFrame(exp2_map)
exp1_map_df[0] = features
exp2_map_df[0] = features

# exp1_map_df
# ax = exp1_map_df.plot(x=0, y=1, kind='barh', figsize=(10,8), legend=False, xlabel='Features', ylabel='Values')
# exp2_map_df.plot(x=0, y=1, ax=ax, kind='barh', figsize=(10,8), legend=False, xlabel='Features', ylabel='Values')

fig = plt.figure()

for frame in [exp1_map_df, exp2_map_df]:
    plt.barh(frame[0], frame[1])
plt.legend(["AAA", "BBB"])
plt.show()

In [None]:
# exp1_map_df
ax = exp1_map_df.plot(x=0, y=1, kind='barh',xlabel='Features',color='r')
exp2_map_df.plot(x=0, y=1, ax=ax, kind='barh',xlabel='Features')
plt.legend(["AAA", "BBB"]); # which one is agreed and disagreed?
plt.show()

In [None]:
pd.DataFrame(exp1_list).plot(x =0, y=1, kind='barh', figsize=(10,8), legend=False, xlabel='Features', ylabel='Values', sort_columns=True)
plt.show()

In [None]:
pd.DataFrame(exp2_list)

# Sankey diagram

In [None]:
# source: agreed and disagreed from younger - all_younger_testcases
# target: agreed and disagreed from older - all_older_testcases
# all_younger_testcases.head()

In [None]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = ["Younger Diabetes Model", "Older Diabetes Model", "Agreed", "Disagreed"],
      color = "blue"
    ),
    link = dict(
      source = [0, 1, 1, 0],
      target = [2, 2, 3, 3],
      value = [len(younger_agreed_testcases), len(older_agreed_testcases), len(older_disagreed_testcases), len(younger_disagreed_testcases)]
  ))])

fig.update_layout(title_text="Sankey Diagram", font_size=10)
fig.show()

labels = [class1_v1, class2_v1, …classk_v1,
          class1, class2, …, classk, 
               class1_v2, class2_v2, …, classk_v2] // 3k labels <br>

Source = [0, 0, …, 0, 1,1,..., 1, … {k blocks}, 
	    k, k, …, k, k+1, k+1, …, k+1, k+2, … ] // k*k*2 <br>
        
Target = [k, k+1, …, 2k {each of the ground truth classes}, k, k+1, …,{k blocks of k length}
                2k, 2k+1, …, 3k-1, 2k, 2k+1, …, 3k-1, … {k blocks of k length} ] // k*k*2 <br>
                
Value = [num(predicted class1 by v1 that are of class1), 
              num(predicted class1 by v1 that are of class2), …,
              num(predicted class1 by v1 that are of classk),
	…,
              num(class 1 where v2 predicted class1), {source k to target 2k}
              num(class 1 where v2 predicted class2), {source k to target 2k+1}…,
              num(class 1 where v2 predicted classk),
              … ] // k*k*2


In [None]:
output = confusion_matrix(younger_y_test, younger_y_pred)
print(len(younger_y_pred))
print(output[0][0]) # true negatives
print(output[0][1]) # false positives - not positive but output positive
print(output[1][0]) # false negatives - not negative but output negative
print(output[1][1]) # true positives

In [None]:
# older_y_pred_opposite = test_decision_tree(older_decisionTree_model, younger_X_test)
# younger_y_pred = test_decision_tree(younger_decisionTree_model, younger_X_test)

older_y_pred_opposite_confusion = confusion_matrix(younger_y_test, younger_y_pred)
younger_y_pred_confusion = confusion_matrix(younger_y_test, older_y_pred_opposite)

labels = ['Younger Diabetes', 'Younger No Diabetes', 'Diabetes', 'No Diabetes', 'Older Diabetes', 'Older No Diabetes']
sources = [0, 0, 1, 1, 4, 4, 5, 5] 
targets = [2, 3, 2, 3, 2, 3, 2, 3]

# num(predicted class1 by v1 that are of class1), 
#               num(predicted class1 by v1 that are of class2), …,

values = [younger_y_pred_confusion[1][1], younger_y_pred_confusion[1][0],
          younger_y_pred_confusion[0][1], younger_y_pred_confusion[0][0],
          older_y_pred_opposite_confusion[1][1], older_y_pred_opposite_confusion[1][0],
          older_y_pred_opposite_confusion[0][1], older_y_pred_opposite_confusion[0][0],]

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels,
      color = "blue"
    ),
    link = dict(
      source = sources,
      target = targets,
      value = values
  ))])

fig.update_layout(title_text="Sankey Diagram", font_size=10)
fig.show()

In [None]:
print(type(younger_y_test))
younger_incorrect_testcases = younger_X_test[np.logical_xor(younger_y_test.to_numpy(), younger_y_pred)]
younger_correct_testcases = younger_X_test.loc[younger_X_test.index.difference(younger_incorrect_testcases.index)]

older_incorrect_testcases = older_X_test[np.logical_xor(older_y_test.to_numpy(), older_y_pred)]
older_correct_testcases = older_X_test.loc[older_X_test.index.difference(older_incorrect_testcases.index)]

# SHAP

In [None]:
data_for_prediction = all_testcases_train.iloc[1]
data_for_prediction_array = data_for_prediction.values.reshape(1, -1)

print(all_testcases_decisionTree_model.predict_proba(data_for_prediction_array))

shap_explainer = shap.TreeExplainer(all_testcases_decisionTree_model)

# Calculate Shap values
shap_values = shap_explainer.shap_values(data_for_prediction)
shap.initjs()
shap.force_plot(shap_explainer.expected_value[1], shap_values[1], data_for_prediction)

In [None]:
shap_explainer = shap.TreeExplainer(all_testcases_decisionTree_model)

# Calculate Shap values
shap_values = shap_explainer.shap_values(all_testcases_train)
shap.initjs()
shap.force_plot(shap_explainer.expected_value[1], shap_values[1], all_testcases_train)

In [None]:
shap_explainer = shap.TreeExplainer(all_testcases_decisionTree_model)

# Calculate Shap values
shap_values = shap_explainer.shap_values(all_testcases['Agreed'])
shap.initjs()
# shap.force_plot(explainer.expected_value[0], shxap_values[0][i], X.values[i], feature_names = X.columns)
shap.force_plot(shap_explainer.expected_value[0], shap_values[0][0], all_testcases['Agreed'][0],feature_names = all_testcases.columns)

In [None]:
shap.initjs()
shap_explainer = shap.TreeExplainer(younger_decisionTree_model)
shap_values = shap_explainer.shap_values(younger_X)
shap.summary_plot(shap_values, younger_X, plot_type="bar") 

In [None]:
shap.initjs()
shap_explainer = shap.TreeExplainer(older_decisionTree_model)
shap_values = shap_explainer.shap_values(older_X)
shap.summary_plot(shap_values, older_X, plot_type="bar") 

In [None]:
shap.initjs()
all_testcases = pd.DataFrame(all_agreed_testcases.append(all_disagreed_testcases))

shap_explainer = shap.TreeExplainer(all_testcases_decisionTree_model)
shap_values = shap_explainer.shap_values(all_testcases[features])
shap.summary_plot(shap_values, all_testcases[features], plot_type="bar") 

# Visualise decision trees
-- Source: https://mljar.com/blog/visualize-decision-tree/

In [None]:
fig = plt.figure(figsize=(40,20))
youngTreeVisualisation = tree.plot_tree(younger_decisionTree_model, feature_names=features, class_names=['0','1'], filled=True)


In [None]:
fig = plt.figure(figsize=(40,20))
oldTreeVisualisation = tree.plot_tree(older_decisionTree_model, feature_names=features, class_names=str(olderDiabetes.Outcome), filled=True)
