## Notebook to compute the precision and recall metrics for the training and test sets of the models

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import VarianceThreshold, SelectKBest, chi2, mutual_info_classif
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, classification_report

import seaborn as sn
import matplotlib.pyplot as plt

### Training set

In [42]:
import ast
res_train = pd.read_csv("../results/run2_for_conf_matrix/VAE/100_hidden_20_emb/tcga_classifier_dropout_0.6_in_0.8_hidden_rec_loss_binary_crossentropy_classifier_cv_final_other_metrics.csv")

In [43]:
res_train

Unnamed: 0.1,Unnamed: 0,Fold,accuracy,other_metrics,mean_accuracy,intermediate_dim,latent_dim,batch_size,epochs_vae,learning_rate,dropout_input,dropout_hidden,dropout_decoder,freeze_weights,classifier_use_z,classifier_loss,reconstruction_loss
0,0,1,0.824242,"{'Basal': {'precision': 0.9333333333333333, 'r...",0.8581,100,20,200,100,0.01,0.6,0.8,True,False,False,categorical_crossentropy,binary_crossentropy
1,1,2,0.858896,"{'Basal': {'precision': 1.0, 'recall': 0.96296...",0.8581,100,20,200,100,0.01,0.6,0.8,True,False,False,categorical_crossentropy,binary_crossentropy
2,2,3,0.920245,"{'Basal': {'precision': 0.9642857142857143, 'r...",0.8581,100,20,200,100,0.01,0.6,0.8,True,False,False,categorical_crossentropy,binary_crossentropy
3,3,4,0.834356,"{'Basal': {'precision': 0.9, 'recall': 1.0, 'f...",0.8581,100,20,200,100,0.01,0.6,0.8,True,False,False,categorical_crossentropy,binary_crossentropy
4,4,5,0.852761,"{'Basal': {'precision': 0.896551724137931, 're...",0.8581,100,20,200,100,0.01,0.6,0.8,True,False,False,categorical_crossentropy,binary_crossentropy


In [44]:
res_train["accuracy"]

0    0.824242
1    0.858896
2    0.920245
3    0.834356
4    0.852761
Name: accuracy, dtype: float64

In [45]:
res_train["mean_accuracy"]

0    0.8581
1    0.8581
2    0.8581
3    0.8581
4    0.8581
Name: mean_accuracy, dtype: float64

In [46]:
res_train["other_metrics"]

0    {'Basal': {'precision': 0.9333333333333333, 'r...
1    {'Basal': {'precision': 1.0, 'recall': 0.96296...
2    {'Basal': {'precision': 0.9642857142857143, 'r...
3    {'Basal': {'precision': 0.9, 'recall': 1.0, 'f...
4    {'Basal': {'precision': 0.896551724137931, 're...
Name: other_metrics, dtype: object

In [47]:
from statistics import stdev
subtypes = ["Basal", "Her2", "LumA", "LumB", "Normal"]
mean_precisions = []
mean_recalls = []

for i in range(0,5):
    dict_aux = ast.literal_eval(res_train["other_metrics"].values[i])
    mean_precisions.append(dict_aux['weighted avg']['precision'])
    mean_recalls.append(dict_aux['weighted avg']['recall'])
    
print(mean_precisions)
print('{}+-{}'.format(np.mean(mean_precisions), stdev(mean_precisions)))
print("----------------")
print(mean_recalls)
print('{}+-{}'.format(np.mean(mean_recalls), stdev(mean_recalls)))

[0.8362909950569526, 0.8619205505093344, 0.921243548544162, 0.816650399702547, 0.8679067565253308]
0.8608024500676654+-0.039544477117777754
----------------
[0.8242424242424242, 0.8588957055214724, 0.9202453987730062, 0.8343558282208589, 0.852760736196319]
0.8581000185908161+-0.03742095871150875


In [48]:
print("Average accuracy: {}+-{}".format(np.mean(res_train["accuracy"].values), stdev(res_train["accuracy"].values)))

Average accuracy: 0.8581000208854676+-0.03742096524681958


### Test set

In [None]:
res_test = pd.read_csv("../results/miRNA/VAE/300_hidden_100_emb/tcga_classifier_dropout_0.4_in_0.8_hidden_rec_loss_binary_crossentropy_classifier_final_test_other_metrics.csv")

In [None]:
subtypes = ["Basal", "Her2", "LumA", "LumB", "Normal"]
weights_test=[36,15,128,26,14]
mean_precisions = []
mean_recalls = []

dict_aux = ast.literal_eval(bacd_test['other_metrics'][1])
arr_pre = []
arr_rec = []
for sub in subtypes:
    arr_pre.append(dict_aux[sub]['precision'])
    arr_rec.append(dict_aux[sub]['recall'])
mean_precisions.append(np.average(arr_pre, weights=weights_test))
mean_recalls.append(np.average(arr_rec, weights=weights_test))
    
print(mean_precisions)
print(mean_recalls)

In [None]:
print("Average accuracy: {}".format(np.mean(res_test["accuracy"].values)))