# <center><b><h1>Cost Matrix Analysis (Evaluation)</h1></b></center>

In [1]:
import numpy as np
import pandas as pd
from sklearn.externals import joblib

## 1. Load the data

### 1.1 Confusion matrix of the best models

In [2]:
all_patients_cm = pd.read_csv("../Data/confusion_matrix/neural_network/best_model_based/All_Patients.csv", index_col=0)
all_patients_cm

Unnamed: 0,tp,fp,fn,tn
All_Patients,41,4111,20,26605


In [3]:
female_patients_no_menopause_cm = pd.read_csv("../Data/confusion_matrix/neural_network/best_model_based/female_patients_no_menopause.csv", index_col=0)
female_patients_no_menopause_cm

Unnamed: 0,tp,fp,fn,tn
Female_Patients_No_Menopause,3,1005,3,3888


In [4]:
male_patients_cm = pd.read_csv("../Data/confusion_matrix/neural_network/best_model_based/male_Patients.csv", index_col=0)
male_patients_cm

Unnamed: 0,tp,fp,fn,tn
Male_Patients,12,2813,6,13005


In [5]:
other_patients_cm = pd.read_csv("../Data/confusion_matrix/neural_network/best_model_based/other_Patients.csv", index_col=0)
other_patients_cm

Unnamed: 0,tp,fp,fn,tn
Other_Patients,28,2369,7,18330


In [6]:
women_menopause_cm = pd.read_csv("../Data/confusion_matrix/neural_network/best_model_based/women_menopause.csv", index_col=0)
women_menopause_cm

Unnamed: 0,tp,fp,fn,tn
Women_Menopause,24,2248,15,7756


#### Unifying the confusion matrices into one table

In [7]:
dataframes = [all_patients_cm, female_patients_no_menopause_cm, male_patients_cm, other_patients_cm, women_menopause_cm]

In [8]:
models_df = pd.concat(dataframes)
models_df.columns.names = ['model']
models_df

model,tp,fp,fn,tn
All_Patients,41,4111,20,26605
Female_Patients_No_Menopause,3,1005,3,3888
Male_Patients,12,2813,6,13005
Other_Patients,28,2369,7,18330
Women_Menopause,24,2248,15,7756


### 1.2 Neural Network models

In [9]:
model_folder_base = '../Models/Neural_Networks/best_models/'

#### Models trained with a Base Ratio of 1 between the classes

In [10]:
all_patients = joblib.load(model_folder_base+'All_Patients_Model.pkl')
female_patients_no_menopause = joblib.load(model_folder_base+'Female_Patients_No_Menopause_Model.pkl')
male_patients = joblib.load(model_folder_base+'Male_Patients_Model.pkl')
other_patients = joblib.load(model_folder_base+'Other_patients_Model.pkl')
women_menopause = joblib.load(model_folder_base+'Women_Menopause_Model.pkl')


## 2. Defining the Cost Matrix

### 2.1 defining costs

In [11]:
xray = 47
mri_and_consultant = 453 

In [12]:
tn_cost = 0
fp_cost = xray
fn_cost = xray + mri_and_consultant
tp_cost = xray

### 2.2 Cost Matrix

In [13]:
cost_matrix_df = pd.DataFrame([list(pd.Series([tn_cost, fp_cost, fn_cost, tp_cost]))],columns=['tn_cost', 'fp_cost', 'fn_cost', 'tp_cost'])
cost_matrix_df.rename(index={0: 'Costs'},inplace=True)
cost_matrix_df

Unnamed: 0,tn_cost,fp_cost,fn_cost,tp_cost
Costs,0,47,500,47


## 3 Compute the total costs

In [14]:
for key,element in models_df.iterrows():
    models_df.loc[key,'total_cost'] = (element['tn'] * cost_matrix_df.loc['Costs','tn_cost']) +\
                                        (element['fp'] * cost_matrix_df.loc['Costs','fp_cost']) +\
                                        (element['fn'] * cost_matrix_df.loc['Costs','fn_cost']) +\
                                        (element['tp'] * cost_matrix_df.loc['Costs','tp_cost'])

In [15]:
models_df

model,tp,fp,fn,tn,total_cost
All_Patients,41,4111,20,26605,205144.0
Female_Patients_No_Menopause,3,1005,3,3888,48876.0
Male_Patients,12,2813,6,13005,135775.0
Other_Patients,28,2369,7,18330,116159.0
Women_Menopause,24,2248,15,7756,114284.0


## 4 Compute the minimum costs for real fractures and non-fractures.

In [16]:
min_non_frac_cost = min(cost_matrix_df.loc['Costs','tn_cost'],cost_matrix_df.loc['Costs','fp_cost'])
min_fracture_cost = min(cost_matrix_df.loc['Costs','fn_cost'],cost_matrix_df.loc['Costs','tp_cost'])

## 5 Compute the new Cost Matrix

In [17]:
min_cost_matrix_df = pd.DataFrame([list(pd.Series([ tn_cost - min_non_frac_cost, 
                                                   fp_cost - min_non_frac_cost, 
                                                   fn_cost - min_fracture_cost, 
                                                   tp_cost - min_fracture_cost]))],
                                  columns=['tn_cost', 'fp_cost', 'fn_cost', 'tp_cost'])

min_cost_matrix_df.rename(index={0: 'Minimum_Costs'},inplace=True)
min_cost_matrix_df

Unnamed: 0,tn_cost,fp_cost,fn_cost,tp_cost
Minimum_Costs,0,47,453,0


## 5 Compute the new Costs

In [18]:
for key,element in models_df.iterrows():
    models_df.loc[key,'minimum_cost'] = (element['tn'] * min_cost_matrix_df.loc['Minimum_Costs','tn_cost']) +\
                                        (element['fp'] * min_cost_matrix_df.loc['Minimum_Costs','fp_cost']) +\
                                        (element['fn'] * min_cost_matrix_df.loc['Minimum_Costs','fn_cost']) +\
                                        (element['tp'] * min_cost_matrix_df.loc['Minimum_Costs','tp_cost'])

In [19]:
models_df

model,tp,fp,fn,tn,total_cost,minimum_cost
All_Patients,41,4111,20,26605,205144.0,202277.0
Female_Patients_No_Menopause,3,1005,3,3888,48876.0,48594.0
Male_Patients,12,2813,6,13005,135775.0,134929.0
Other_Patients,28,2369,7,18330,116159.0,114514.0
Women_Menopause,24,2248,15,7756,114284.0,112451.0


## 5 Compare the models

In [20]:
all_patients_costs = models_df.loc['All_Patients','minimum_cost']

tsne_division_costs = models_df.loc['Other_Patients','minimum_cost'] +\
                        models_df.loc['Women_Menopause','minimum_cost']

complete_division_costs = models_df.loc['Female_Patients_No_Menopause','minimum_cost'] +\
                            models_df.loc['Male_Patients','minimum_cost'] +\
                            models_df.loc['Women_Menopause','minimum_cost']

In [21]:
if min(all_patients_costs, tsne_division_costs, complete_division_costs) == all_patients_costs:
    print("The best model by the Cost Matrix Analysis is : All_Patients")
elif min(all_patients_costs, tsne_division_costs, complete_division_costs) == tsne_division_costs:
    print("The best model by the Cost Matrix Analysis is the one suggesed from the t-SNE Analysis")
else:
    print("The best model by the Cost Matrix Analysis is the one composed by the three models full divided")

The best model by the Cost Matrix Analysis is : All_Patients


In [22]:
all_patients_costs, tsne_division_costs, complete_division_costs

(202277.0, 226965.0, 295974.0)