In [1]:
import pandas as pd
from collections import Counter
import scipy.stats as stats
from sklearn.metrics import (
    accuracy_score, jaccard_score, cohen_kappa_score, precision_recall_fscore_support,
    classification_report, confusion_matrix
)
import numpy as np
import re

In [42]:
def metrics(prediction1, prediction2):
    # Making sure they have different column names
    prediction1.name = "sx"
    prediction2.name = "dx"
    
    # Joining into a single DataFrame
    prediction_data_frame = prediction1.to_frame().join(prediction2, how="inner").dropna()
    
    # Ensuring there are no spaces (for string labels)
    if prediction_data_frame.dtypes[0] == 'object':
        prediction1 = prediction_data_frame[prediction_data_frame.columns[0]].str.strip()
        prediction2 = prediction_data_frame[prediction_data_frame.columns[1]].str.strip()
    else:
        prediction1 = prediction_data_frame[prediction_data_frame.columns[0]]
        prediction2 = prediction_data_frame[prediction_data_frame.columns[1]]

    print("Unique classes in prediction1:", np.unique(prediction1))
    print("Unique classes in prediction2:", np.unique(prediction2))

    # Accuracy
    accuracy = accuracy_score(prediction1, prediction2)
    print("Accuracy:", accuracy)

    # Jaccard Score
    jaccard = jaccard_score(prediction1, prediction2, average='macro')
    print("Jaccard Score:", jaccard)

    # Cohen’s Kappa Score
    kappa = cohen_kappa_score(prediction1, prediction2)
    print("Cohen Kappa Score:", kappa)

    # Precision, Recall, F1-Score
    precision, recall, f1, support = precision_recall_fscore_support(prediction1, prediction2, average='macro')
    print(f"Precision (Macro): {precision:.4f}")
    print(f"Recall/Sensitivity (Macro): {recall:.4f}")
    print(f"F1 Score (Macro): {f1:.4f}")

    # Classification Report
    print("\nClassification Report:\n", classification_report(prediction1, prediction2))

    # Compute Specificity & Sensitivity (Recall) for each class
    cm = confusion_matrix(prediction1, prediction2)
    specificity_per_class = {}
    sensitivity_per_class = {}

    for i, label in enumerate(np.unique(prediction1)):
        tn = cm.sum() - (cm[i, :].sum() + cm[:, i].sum() - cm[i, i])
        fp = cm[:, i].sum() - cm[i, i]
        fn = cm[i, :].sum() - cm[i, i]
        tp = cm[i, i]

        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
        sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0  # This is Recall for the specific class

        specificity_per_class[label] = specificity
        sensitivity_per_class[label] = sensitivity

    print("\nSpecificity per class:", specificity_per_class)
    print("Sensitivity (Recall) per class:", sensitivity_per_class)

    return {
        "accuracy": accuracy,
        "jaccard": jaccard,
        "kappa": kappa,
        "precision_macro": precision,
        "recall_macro": recall,
        "f1_macro": f1,
        "specificity_per_class": specificity_per_class,
        "sensitivity_per_class": sensitivity_per_class
    }


In [3]:
def find_consensus(row):
    diseases = ["NV", "MEL", "AK", "BKL", "SCC", "BCC", "DF", "VASC", "Altro"]
    occs = []
    
    for disease in diseases:
        occ = row.vote.count(disease)
        occs.append(occ)
        row[disease] = occ
        
    row["consensus_vote"] = diseases[np.argmax(occs)]
    
    #vedo se c'è un'altra malattia con lo stesso numero di voti
    for disease, occ in zip(diseases, occs):
        if row["consensus_vote"] != disease and max(occs) == occ:
            row["consensus_vote"] = None
            
    return row

In [4]:
def findIndex(row):
    return int(re.sub("[^0-9]", "", row.difficult_img_image_name.strip()[:3]))

In [5]:
def calculateVdiCramer(dataset):
    X2 = stats.chi2_contingency(dataset, correction=False)[0]
    
    # calculate the minimum of the number of categories in the two variables
    min_categories = min(dataset.shape[0], dataset.shape[1])

    # calculate Cramer's V coefficient
    n = dataset.sum().sum()
    V = np.sqrt(X2 / (n * (min_categories - 1)))

    # Print the result
    return V

## Loading data

In [6]:
diagnosis = pd.read_csv("DataFrames/diagnosi.csv")

In [7]:
labels_dataframe = pd.read_csv("DataFrames/label.csv", index_col = "image")
labels_dataframe = labels_dataframe.T.idxmax()
labels_dataframe.name = "truth"
metadata = pd.read_csv("../../ISIC_2019_Training_Metadata.csv")

In [8]:
probabilities = pd.read_csv("DataFrames/probabilities.csv", header = [0, 1], index_col = 0)
predictions = probabilities.T.groupby("Model").idxmax().T.map(lambda x: x[1] if isinstance(x, tuple) else x)

In [9]:
#adding labels
predictions = labels_dataframe.to_frame().join(predictions)

In [10]:
#To make sure there are no spaces
diagnosis.difficult_img_image_name = diagnosis.difficult_img_image_name.str.strip()

In [11]:
# deleting tests
diagnosis = diagnosis[diagnosis.wp_users_ID != 1]

In [12]:
# 0 -> train test split
# 1 -> cross validation
# 2 -> control group

diagnosis.groupby(["difficult_img_version", "wp_users_ID"]).difficult_img_image_name.count()

difficult_img_version  wp_users_ID
0                      2              115
                       3              122
                       4              122
1                      2               67
                       3               67
                       4               68
2                      2               78
                       3               80
                       4               80
Name: difficult_img_image_name, dtype: int64

In [13]:
# setting all "difficult" image to the same version 
diagnosis.difficult_img_version = np.where(diagnosis.difficult_img_version == 2, 2, 1)

In [14]:
# 1 -> difficult images
# 2 -> control group

diagnosis.groupby(["difficult_img_version", "wp_users_ID"]).difficult_img_image_name.count()

difficult_img_version  wp_users_ID
1                      2              182
                       3              189
                       4              190
2                      2               78
                       3               80
                       4               80
Name: difficult_img_image_name, dtype: int64

In [15]:
prediction_diagnosis = diagnosis.merge(predictions, left_on="difficult_img_image_name", right_on="image")
prediction_diagnosis

Unnamed: 0,wp_users_ID,difficult_img_image_name,difficult_img_version,vote,text,timestamp,deleted,truth,EfficientNetB4,EfficientNetB5,EfficientNetB6,Resnet152,Resnext50
0,2,ISIC_0010441,1,NV,,2023-11-05 16:27:42,0,MEL,BCC,BCC,BCC,BCC,BCC
1,2,ISIC_0010476,1,MEL,,2023-10-05 10:43:27,0,MEL,BKL,MEL,NV,NV,NV
2,2,ISIC_0010576,1,NV,,2023-11-05 16:28:02,0,MEL,NV,NV,NV,NV,NV
3,2,ISIC_0010849,1,NV,,2023-10-05 10:43:27,0,NV,MEL,NV,NV,MEL,MEL
4,2,ISIC_0011084,1,MEL,,2023-11-05 16:27:53,0,NV,MEL,BKL,BKL,MEL,MEL
...,...,...,...,...,...,...,...,...,...,...,...,...,...
794,4,ISIC_0072286,1,BKL,,2023-11-10 23:32:02,0,MEL,BKL,BKL,BKL,BKL,BKL
795,4,ISIC_0072573,1,VASC,,2023-11-10 12:11:42,0,BCC,VASC,NV,VASC,MEL,BCC
796,4,ISIC_0072871,2,BCC,,2023-11-10 23:12:12,0,BCC,BCC,BCC,BCC,BCC,BCC
797,4,ISIC_0072887,1,BCC,,2023-11-10 23:40:31,0,MEL,BKL,BKL,BKL,MEL,AK


## Comparing expert diagnosis

In [16]:
diagnosis.groupby("wp_users_ID").count()

Unnamed: 0_level_0,difficult_img_image_name,difficult_img_version,vote,text,timestamp,deleted
wp_users_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2,260,260,260,4,260,260
3,269,269,269,36,269,269
4,270,270,270,22,270,270


In [17]:
ids_expert = np.unique(diagnosis.wp_users_ID)

# Compare the diagnosis

for index_sx, id_expert_sx in enumerate(ids_expert):
    dataset_sx = diagnosis[diagnosis.wp_users_ID == id_expert_sx].set_index('difficult_img_image_name')
    for id_expert_dx in ids_expert[index_sx+1:]:
        dataset_dx = diagnosis[diagnosis.wp_users_ID == id_expert_dx].set_index('difficult_img_image_name')
        crosstab = pd.crosstab(dataset_sx.vote, dataset_dx.vote)
        display(crosstab)
        metrics(dataset_sx.vote, dataset_dx.vote)
        print(calculateVdiCramer(crosstab))

vote,AK,Altro,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AK,16,3,2,0,0,0,0,9,0
Altro,3,2,3,1,1,3,1,3,2
BCC,0,0,11,0,0,1,1,10,1
BKL,2,0,0,5,0,2,0,2,1
DF,0,0,0,0,5,0,0,0,0
MEL,8,3,3,1,0,45,7,3,2
NV,6,2,5,2,0,18,20,3,3
SCC,2,0,1,0,0,0,0,21,0
VASC,0,0,2,0,0,1,0,2,9


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.5173745173745173
Jaccard Score: 0.3703302686957325
Cohen Kappa Score: 0.42921368124118475
Precision (Macro): 0.5175
Recall/Sensitivity (Macro): 0.5550
F1 Score (Macro): 0.5143

Classification Report:
               precision    recall  f1-score   support

          AK       0.43      0.53      0.48        30
       Altro       0.20      0.11      0.14        19
         BCC       0.41      0.46      0.43        24
         BKL       0.56      0.42      0.48        12
          DF       0.83      1.00      0.91         5
         MEL       0.64      0.62      0.63        72
          NV       0.69      0.34      0.45        59
         SCC       0.40      0.88      0.55        24
        VASC       0.50      0.64      0.56        14

    accuracy                           0.52       259
   macro avg     

  if prediction_data_frame.dtypes[0] == 'object':


vote,AK,Altro,BCC,BKL,DF,MEL,NV,SCC,VASC
sx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AK,17,3,0,2,0,5,1,2,0
Altro,2,1,0,3,0,4,1,3,5
BCC,4,2,11,0,0,2,0,5,0
BKL,2,0,1,3,0,5,1,1,0
DF,0,0,0,0,0,2,0,0,3
MEL,1,2,1,3,0,57,4,2,2
NV,1,3,5,5,2,27,15,1,0
SCC,5,1,1,0,0,0,0,17,0
VASC,2,0,1,0,0,1,0,1,9


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.5
Jaccard Score: 0.262774935593183
Cohen Kappa Score: 0.39541373018996173
Precision (Macro): 0.3957
Recall/Sensitivity (Macro): 0.4117
F1 Score (Macro): 0.3863

Classification Report:
               precision    recall  f1-score   support

          AK       0.50      0.57      0.53        30
       Altro       0.08      0.05      0.06        19
         BCC       0.55      0.46      0.50        24
         BKL       0.19      0.23      0.21        13
          DF       0.00      0.00      0.00         5
         MEL       0.55      0.79      0.65        72
          NV       0.68      0.25      0.37        59
         SCC       0.53      0.71      0.61        24
        VASC       0.47      0.64      0.55        14

    accuracy                           0.50       260
   macro avg       0.40      0.41

  if prediction_data_frame.dtypes[0] == 'object':


vote,AK,Altro,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AK,14,2,0,6,0,14,0,2,0
Altro,3,3,0,0,0,4,1,0,0
BCC,3,0,11,2,0,7,1,2,2
BKL,1,0,0,4,0,1,1,2,0
DF,0,0,0,0,0,2,0,0,4
MEL,0,1,3,0,0,59,8,1,1
NV,0,2,0,3,0,11,16,0,1
SCC,13,4,6,0,0,4,1,24,1
VASC,0,0,0,1,2,4,0,1,10


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.5241635687732342
Jaccard Score: 0.27668755707352527
Cohen Kappa Score: 0.4215150955125082
Precision (Macro): 0.4296
Recall/Sensitivity (Macro): 0.4200
F1 Score (Macro): 0.4130

Classification Report:
               precision    recall  f1-score   support

          AK       0.41      0.37      0.39        38
       Altro       0.25      0.27      0.26        11
         BCC       0.55      0.39      0.46        28
         BKL       0.25      0.44      0.32         9
          DF       0.00      0.00      0.00         6
         MEL       0.56      0.81      0.66        73
          NV       0.57      0.48      0.52        33
         SCC       0.75      0.45      0.56        53
        VASC       0.53      0.56      0.54        18

    accuracy                           0.52       269
   macro avg     

  if prediction_data_frame.dtypes[0] == 'object':


In [18]:
types_of_image = diagnosis.difficult_img_version.unique()

In [19]:
for index_sx, id_expert_sx in enumerate(ids_expert):
    dataset_sx = diagnosis[diagnosis.wp_users_ID == id_expert_sx].set_index('difficult_img_image_name')
    for id_expert_dx in ids_expert[index_sx+1:]:
        dataset_dx = diagnosis[diagnosis.wp_users_ID == id_expert_dx].set_index('difficult_img_image_name')
        for type_image in types_of_image:
            print("=="*20)
            print("Expert sx: ", id_expert_sx, "Expert dx: ", id_expert_dx)
            print("Image type: ", type_image)
            print("=="*20)
            dataset_sx_type = dataset_sx[dataset_sx.difficult_img_version == type_image]
            dataset_dx_type = dataset_dx[dataset_dx.difficult_img_version == type_image]
            crosstab = pd.crosstab(dataset_sx_type.vote, dataset_dx_type.vote )
            display(crosstab)
            metrics(dataset_sx_type.vote, dataset_dx_type.vote )
            print(calculateVdiCramer(crosstab))

Expert sx:  2 Expert dx:  3
Image type:  1


vote,AK,Altro,BCC,BKL,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,12,3,2,0,0,0,8,0
Altro,2,1,2,0,2,1,2,1
BCC,0,0,5,0,1,1,8,0
BKL,0,0,0,1,0,0,2,0
MEL,8,3,3,1,35,7,3,1
NV,5,2,4,2,13,16,3,3
SCC,2,0,0,0,0,0,12,0
VASC,0,0,0,0,0,0,2,2


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'Altro' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.46408839779005523
Jaccard Score: 0.24256299256299257
Cohen Kappa Score: 0.3445456581796461
Precision (Macro): 0.3749
Recall/Sensitivity (Macro): 0.4377
F1 Score (Macro): 0.3780

Classification Report:
               precision    recall  f1-score   support

          AK       0.41      0.48      0.44        25
       Altro       0.11      0.09      0.10        11
         BCC       0.31      0.33      0.32        15
         BKL       0.25      0.33      0.29         3
         MEL       0.69      0.57      0.62        61
          NV       0.64      0.33      0.44        48
         SCC       0.30      0.86      0.44        14
        VASC       0.29      0.50      0.36         4

    accuracy                           0.46       181
   macro avg       0.37      0.44      0.38       181
weighted avg       0.52   

  if prediction_data_frame.dtypes[0] == 'object':


vote,AK,Altro,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AK,4,0,0,0,0,0,0,1,0
Altro,1,1,1,1,1,1,0,1,1
BCC,0,0,6,0,0,0,0,2,1
BKL,2,0,0,4,0,2,0,0,1
DF,0,0,0,0,5,0,0,0,0
MEL,0,0,0,0,0,10,0,0,1
NV,1,0,1,0,0,5,4,0,0
SCC,0,0,1,0,0,0,0,9,0
VASC,0,0,2,0,0,1,0,0,7


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.6410256410256411
Jaccard Score: 0.4708714125380793
Cohen Kappa Score: 0.5932203389830508
Precision (Macro): 0.7260
Recall/Sensitivity (Macro): 0.6565
F1 Score (Macro): 0.6186

Classification Report:
               precision    recall  f1-score   support

          AK       0.50      0.80      0.62         5
       Altro       1.00      0.12      0.22         8
         BCC       0.55      0.67      0.60         9
         BKL       0.80      0.44      0.57         9
          DF       0.83      1.00      0.91         5
         MEL       0.53      0.91      0.67        11
          NV       1.00      0.36      0.53        11
         SCC       0.69      0.90      0.78        10
        VASC       0.64      0.70      0.67        10

    accuracy                           0.64        78
   macro avg      

  if prediction_data_frame.dtypes[0] == 'object':


vote,AK,Altro,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AK,13,3,0,1,0,5,1,2,0
Altro,1,1,0,3,0,2,1,1,2
BCC,4,2,4,0,0,1,0,4,0
BKL,0,0,1,1,0,2,0,0,0
MEL,1,2,1,3,0,46,4,2,2
NV,1,3,3,5,2,22,11,1,0
SCC,4,1,0,0,0,0,0,9,0
VASC,1,0,0,0,0,0,0,0,3


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.4835164835164835
Jaccard Score: 0.23418147293595323
Cohen Kappa Score: 0.34980237154150196
Precision (Macro): 0.3626
Recall/Sensitivity (Macro): 0.3893
F1 Score (Macro): 0.3499

Classification Report:
               precision    recall  f1-score   support

          AK       0.52      0.52      0.52        25
       Altro       0.08      0.09      0.09        11
         BCC       0.44      0.27      0.33        15
         BKL       0.08      0.25      0.12         4
          DF       0.00      0.00      0.00         0
         MEL       0.59      0.75      0.66        61
          NV       0.65      0.23      0.34        48
         SCC       0.47      0.64      0.55        14
        VASC       0.43      0.75      0.55         4

    accuracy                           0.48       182
   macro avg       0.

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


vote,AK,BCC,BKL,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AK,4,0,1,0,0,0,0
Altro,1,0,0,2,0,2,3
BCC,0,7,0,1,0,1,0
BKL,2,0,2,3,1,1,0
DF,0,0,0,2,0,0,3
MEL,0,0,0,11,0,0,0
NV,0,2,0,5,4,0,0
SCC,1,1,0,0,0,8,0
VASC,1,1,0,1,0,1,6


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.5384615384615384
Jaccard Score: 0.3133475783475783
Cohen Kappa Score: 0.47346709169323087
Precision (Macro): 0.4559
Recall/Sensitivity (Macro): 0.5071
F1 Score (Macro): 0.4397

Classification Report:
               precision    recall  f1-score   support

          AK       0.44      0.80      0.57         5
       Altro       0.00      0.00      0.00         8
         BCC       0.64      0.78      0.70         9
         BKL       0.67      0.22      0.33         9
          DF       0.00      0.00      0.00         5
         MEL       0.44      1.00      0.61        11
          NV       0.80      0.36      0.50        11
         SCC       0.62      0.80      0.70        10
        VASC       0.50      0.60      0.55        10

    accuracy                           0.54        78
   macro avg       0.46      0

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


vote,AK,Altro,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AK,9,2,0,4,0,13,0,2,0
Altro,3,3,0,0,0,3,1,0,0
BCC,2,0,3,2,0,6,1,1,1
BKL,0,0,0,3,0,1,0,0,0
MEL,0,1,2,0,0,43,6,1,1
NV,0,2,0,3,0,9,13,0,1
SCC,11,4,4,0,0,4,1,15,1
VASC,0,0,0,1,2,1,0,0,3


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.48677248677248675
Jaccard Score: 0.23796793876885414
Cohen Kappa Score: 0.3658595641646488
Precision (Macro): 0.3912
Recall/Sensitivity (Macro): 0.4002
F1 Score (Macro): 0.3658

Classification Report:
               precision    recall  f1-score   support

          AK       0.36      0.30      0.33        30
       Altro       0.25      0.30      0.27        10
         BCC       0.33      0.19      0.24        16
         BKL       0.23      0.75      0.35         4
          DF       0.00      0.00      0.00         0
         MEL       0.54      0.80      0.64        54
          NV       0.59      0.46      0.52        28
         SCC       0.79      0.38      0.51        40
        VASC       0.43      0.43      0.43         7

    accuracy                           0.49       189
   macro avg       0.

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


vote,AK,BCC,BKL,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AK,5,0,2,1,0,0,0
Altro,0,0,0,1,0,0,0
BCC,1,8,0,1,0,1,1
BKL,1,0,1,0,1,2,0
DF,0,0,0,2,0,0,4
MEL,0,1,0,16,2,0,0
NV,0,0,0,2,3,0,0
SCC,2,2,0,0,0,9,0
VASC,0,0,0,3,0,1,7


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.6125
Jaccard Score: 0.3318325606104511
Cohen Kappa Score: 0.5369678864824496
Precision (Macro): 0.4452
Recall/Sensitivity (Macro): 0.4736
F1 Score (Macro): 0.4546

Classification Report:
               precision    recall  f1-score   support

          AK       0.56      0.62      0.59         8
       Altro       0.00      0.00      0.00         1
         BCC       0.73      0.67      0.70        12
         BKL       0.33      0.20      0.25         5
          DF       0.00      0.00      0.00         6
         MEL       0.62      0.84      0.71        19
          NV       0.50      0.60      0.55         5
         SCC       0.69      0.69      0.69        13
        VASC       0.58      0.64      0.61        11

    accuracy                           0.61        80
   macro avg       0.45      0.47      0.45

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [20]:
consensus_diagnosi = diagnosis.groupby("difficult_img_image_name", as_index = False).vote.apply(lambda x: ','.join(x))
diagnosis_version = diagnosis.groupby("difficult_img_image_name", as_index = False).difficult_img_version.first()
consensus_diagnosi = consensus_diagnosi.merge(diagnosis_version, on="difficult_img_image_name")

In [21]:
consensus_diagnosi = consensus_diagnosi.apply(find_consensus, axis = 1)
consensus_diagnosi

Unnamed: 0,difficult_img_image_name,vote,difficult_img_version,NV,MEL,AK,BKL,SCC,BCC,DF,VASC,Altro,consensus_vote
0,ISIC_0000072,"NV, NV",1,2,0,0,0,0,0,0,0,0,NV
1,ISIC_0000343,"Altro , MEL",1,0,1,0,0,0,0,0,0,1,
2,ISIC_0000465,"BCC , MEL",2,0,1,0,0,0,1,0,0,0,
3,ISIC_0000533,"NV, NV",1,2,0,0,0,0,0,0,0,0,NV
4,ISIC_0006193_downsampled,"NV, NV",2,2,0,0,0,0,0,0,0,0,NV
...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,ISIC_0072286,"AK, AK, BKL",1,0,0,2,1,0,0,0,0,0,AK
266,ISIC_0072573,"VASC, VASC, VASC",1,0,0,0,0,0,0,0,3,0,VASC
267,ISIC_0072871,"BCC , BCC , BCC",2,0,0,0,0,0,3,0,0,0,BCC
268,ISIC_0072887,"BKL , SCC , BCC",1,0,0,0,1,1,1,0,0,0,


In [22]:
consensus_diagnosi.consensus_vote.value_counts(dropna=False)

consensus_vote
MEL      79
None     53
NV       33
SCC      30
AK       29
BCC      15
VASC     12
BKL       8
Altro     6
DF        5
Name: count, dtype: int64

In [23]:
print(len(consensus_diagnosi[consensus_diagnosi.consensus_vote.isna()]))
consensus_diagnosi[consensus_diagnosi.consensus_vote.isna()].groupby("difficult_img_version").size()

53


difficult_img_version
1    44
2     9
dtype: int64

In [24]:
consensus_diagnosi = consensus_diagnosi[consensus_diagnosi.consensus_vote.notna()].set_index('difficult_img_image_name')
consensus_diagnosi

Unnamed: 0_level_0,vote,difficult_img_version,NV,MEL,AK,BKL,SCC,BCC,DF,VASC,Altro,consensus_vote
difficult_img_image_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
ISIC_0000072,"NV, NV",1,2,0,0,0,0,0,0,0,0,NV
ISIC_0000533,"NV, NV",1,2,0,0,0,0,0,0,0,0,NV
ISIC_0006193_downsampled,"NV, NV",2,2,0,0,0,0,0,0,0,0,NV
ISIC_0010011,"NV, NV",1,2,0,0,0,0,0,0,0,0,NV
ISIC_0010265,"MEL , MEL",1,0,2,0,0,0,0,0,0,0,MEL
...,...,...,...,...,...,...,...,...,...,...,...,...
ISIC_0071772,"SCC , BCC , SCC",2,0,0,0,0,2,1,0,0,0,SCC
ISIC_0072286,"AK, AK, BKL",1,0,0,2,1,0,0,0,0,0,AK
ISIC_0072573,"VASC, VASC, VASC",1,0,0,0,0,0,0,0,3,0,VASC
ISIC_0072871,"BCC , BCC , BCC",2,0,0,0,0,0,3,0,0,0,BCC


# Comparing diagnosis 

In [25]:
# deleting "UNK" diagnosis
diagnosis = diagnosis[diagnosis.vote != "Altro"]
consensus_diagnosi = consensus_diagnosi[consensus_diagnosi.consensus_vote != "Altro"]

In [26]:
types_of_image = np.unique(diagnosis.difficult_img_version)

## Diagnosis vs label

In [27]:
truth = prediction_diagnosis.groupby("difficult_img_image_name").first()

In [28]:
prediction_diagnosis

Unnamed: 0,wp_users_ID,difficult_img_image_name,difficult_img_version,vote,text,timestamp,deleted,truth,EfficientNetB4,EfficientNetB5,EfficientNetB6,Resnet152,Resnext50
0,2,ISIC_0010441,1,NV,,2023-11-05 16:27:42,0,MEL,BCC,BCC,BCC,BCC,BCC
1,2,ISIC_0010476,1,MEL,,2023-10-05 10:43:27,0,MEL,BKL,MEL,NV,NV,NV
2,2,ISIC_0010576,1,NV,,2023-11-05 16:28:02,0,MEL,NV,NV,NV,NV,NV
3,2,ISIC_0010849,1,NV,,2023-10-05 10:43:27,0,NV,MEL,NV,NV,MEL,MEL
4,2,ISIC_0011084,1,MEL,,2023-11-05 16:27:53,0,NV,MEL,BKL,BKL,MEL,MEL
...,...,...,...,...,...,...,...,...,...,...,...,...,...
794,4,ISIC_0072286,1,BKL,,2023-11-10 23:32:02,0,MEL,BKL,BKL,BKL,BKL,BKL
795,4,ISIC_0072573,1,VASC,,2023-11-10 12:11:42,0,BCC,VASC,NV,VASC,MEL,BCC
796,4,ISIC_0072871,2,BCC,,2023-11-10 23:12:12,0,BCC,BCC,BCC,BCC,BCC,BCC
797,4,ISIC_0072887,1,BCC,,2023-11-10 23:40:31,0,MEL,BKL,BKL,BKL,MEL,AK


In [29]:
diagnosis_label = pd.crosstab(truth.truth, consensus_diagnosi.consensus_vote)
diagnosis_label

consensus_vote,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
truth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,6,0,0,0,5,1,10,0
BCC,6,8,0,0,1,1,6,1
BKL,5,1,6,0,10,4,3,0
DF,0,0,0,5,3,1,0,1
MEL,7,0,1,0,40,14,1,1
NV,1,3,1,0,15,11,2,0
SCC,4,3,0,0,4,0,8,0
VASC,0,0,0,0,1,1,0,9


In [30]:
V_diagnosi_cnn_old= calculateVdiCramer(diagnosis_label)
V_diagnosi_cnn_old

0.5023547748014681

In [31]:
metrics(truth.truth, consensus_diagnosi.consensus_vote)

Unique classes in prediction1: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.44075829383886256
Jaccard Score: 0.31498462446706704
Cohen Kappa Score: 0.31615809277925777
Precision (Macro): 0.5433
Recall/Sensitivity (Macro): 0.4406
F1 Score (Macro): 0.4562

Classification Report:
               precision    recall  f1-score   support

          AK       0.21      0.27      0.24        22
         BCC       0.53      0.35      0.42        23
         BKL       0.75      0.21      0.32        29
          DF       1.00      0.50      0.67        10
         MEL       0.51      0.62      0.56        64
          NV       0.33      0.33      0.33        33
         SCC       0.27      0.42      0.33        19
        VASC       0.75      0.82      0.78        11

    accuracy                           0.44       211
   macro avg       0.54      0.44      0.46       211
weighted avg       0.50      0.

  if prediction_data_frame.dtypes[0] == 'object':


{'accuracy': 0.44075829383886256,
 'jaccard': 0.31498462446706704,
 'kappa': 0.31615809277925777,
 'precision_macro': 0.5433198748726902,
 'recall_macro': 0.44062721181275394,
 'f1_macro': 0.45615636761099526,
 'specificity_per_class': [0.8783068783068783,
  0.9627659574468085,
  0.989010989010989,
  1.0,
  0.7346938775510204,
  0.8764044943820225,
  0.8854166666666666,
  0.985]}

In [43]:
types_of_image = [1, 2]
for type_image in types_of_image:
    dataset = truth[truth.difficult_img_version == type_image]
    crosstab = pd.crosstab(dataset.truth, consensus_diagnosi.consensus_vote)
    display(crosstab)
    metrics(dataset.truth, consensus_diagnosi.consensus_vote)
    print(calculateVdiCramer(crosstab))

dx,AK,BCC,BKL,MEL,NV,SCC,VASC
truth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AK,2,0,0,5,0,6,0
BCC,6,0,0,1,1,4,1
BKL,3,1,1,9,4,2,0
DF,0,0,0,1,1,0,1
MEL,6,0,1,32,14,1,1
NV,1,3,1,12,5,2,0
SCC,4,2,0,2,0,3,0
VASC,0,0,0,0,1,0,1


Unique classes in prediction1: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.3120567375886525
Jaccard Score: 0.11362836509895334
Cohen Kappa Score: 0.10120260235263179
Precision (Macro): 0.1937
Recall/Sensitivity (Macro): 0.2208
F1 Score (Macro): 0.1861

Classification Report:
               precision    recall  f1-score   support

          AK       0.09      0.15      0.11        13
         BCC       0.00      0.00      0.00        13
         BKL       0.33      0.05      0.09        20
          DF       0.00      0.00      0.00         3
         MEL       0.52      0.58      0.55        55
          NV       0.19      0.21      0.20        24
         SCC       0.17      0.27      0.21        11
        VASC       0.25      0.50      0.33         2

    accuracy                           0.31       141
   macro avg       0.19      0.22      0.19       141
weighted avg       0.31      0.31    

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
truth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,4,0,0,0,0,1,4,0
BCC,0,8,0,0,0,0,2,0
BKL,2,0,5,0,1,0,1,0
DF,0,0,0,5,2,0,0,0
MEL,1,0,0,0,8,0,0,0
NV,0,0,0,0,3,6,0,0
SCC,0,1,0,0,2,0,5,0
VASC,0,0,0,0,1,0,0,8


Unique classes in prediction1: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.7
Jaccard Score: 0.5746392496392496
Cohen Kappa Score: 0.6567826289983656
Precision (Macro): 0.7756
Recall/Sensitivity (Macro): 0.6980
F1 Score (Macro): 0.7120

Classification Report:
               precision    recall  f1-score   support

          AK       0.57      0.44      0.50         9
         BCC       0.89      0.80      0.84        10
         BKL       1.00      0.56      0.71         9
          DF       1.00      0.71      0.83         7
         MEL       0.47      0.89      0.62         9
          NV       0.86      0.67      0.75         9
         SCC       0.42      0.62      0.50         8
        VASC       1.00      0.89      0.94         9

    accuracy                           0.70        70
   macro avg       0.78      0.70      0.71        70
weighted avg       0.78      0.70      0.71      

  if prediction_data_frame.dtypes[0] == 'object':


## CNN cs Diagnosis

In [33]:
truth["vote_ensemble"] = truth[["EfficientNetB4","EfficientNetB5","EfficientNetB6","Resnet152","Resnext50"]].mode(axis=1)[0]

In [34]:
truth

Unnamed: 0_level_0,wp_users_ID,difficult_img_version,vote,text,timestamp,deleted,truth,EfficientNetB4,EfficientNetB5,EfficientNetB6,Resnet152,Resnext50,vote_ensemble
difficult_img_image_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ISIC_0000072,3,1,NV,,2023-11-07 07:50:58,0,NV,MEL,MEL,MEL,MEL,MEL,MEL
ISIC_0000343,3,1,Altro,solar lentigo,2023-11-07 11:21:36,0,NV,MEL,MEL,MEL,MEL,NV,MEL
ISIC_0000465,3,2,BCC,,2023-11-07 08:40:33,0,NV,NV,NV,NV,NV,NV,NV
ISIC_0000533,3,1,NV,,2023-11-07 07:51:08,0,MEL,NV,NV,NV,NV,NV,NV
ISIC_0006193_downsampled,3,2,NV,,2023-11-07 08:40:46,0,NV,NV,NV,NV,NV,NV,NV
...,...,...,...,...,...,...,...,...,...,...,...,...,...
ISIC_0072286,2,1,AK,,2023-11-05 16:45:16,0,MEL,BKL,BKL,BKL,BKL,BKL,BKL
ISIC_0072573,2,1,VASC,,2023-10-05 10:43:27,0,BCC,VASC,NV,VASC,MEL,BCC,VASC
ISIC_0072871,2,2,BCC,,2023-11-05 16:56:31,0,BCC,BCC,BCC,BCC,BCC,BCC,BCC
ISIC_0072887,2,1,BKL,,2023-10-05 10:43:27,0,MEL,BKL,BKL,BKL,MEL,AK,BKL


In [35]:
diagnosi_cnn = pd.crosstab(truth["vote_ensemble"], consensus_diagnosi.consensus_vote)
diagnosi_cnn

dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote_ensemble,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,16,0,0,0,0,2,9,0
BCC,2,14,0,0,2,0,8,1
BKL,6,0,7,0,6,1,3,0
DF,0,0,0,5,4,0,0,0
MEL,5,0,0,0,21,4,1,0
NV,0,0,1,0,42,26,2,2
SCC,0,1,0,0,3,0,7,0
VASC,0,0,0,0,1,0,0,9


In [36]:
V_diagnosi_cnn = calculateVdiCramer(diagnosi_cnn)
V_diagnosi_cnn

0.6091380217051517

In [44]:
for type_image in types_of_image:
    dataset = consensus_diagnosi[consensus_diagnosi.difficult_img_version == type_image]
    crosstab = pd.crosstab(dataset.consensus_vote, truth["vote_ensemble"])
    display(crosstab)
    metrics(dataset.consensus_vote, truth["vote_ensemble"])
    print(calculateVdiCramer(crosstab))  

dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
consensus_vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,12,2,4,0,4,0,0,0
BCC,0,6,0,0,0,0,0,0
BKL,0,0,2,0,0,1,0,0
MEL,0,2,5,2,13,39,1,0
NV,1,0,1,0,4,20,0,0
SCC,5,6,2,0,1,2,2,0
VASC,0,1,0,0,0,2,0,1


Unique classes in prediction1: ['AK' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.3971631205673759
Jaccard Score: 0.21736524669170854
Cohen Kappa Score: 0.26268840356813294
Precision (Macro): 0.4666
Recall/Sensitivity (Macro): 0.4440
F1 Score (Macro): 0.3377

Classification Report:
               precision    recall  f1-score   support

          AK       0.67      0.55      0.60        22
         BCC       0.35      1.00      0.52         6
         BKL       0.14      0.67      0.24         3
          DF       0.00      0.00      0.00         0
         MEL       0.59      0.21      0.31        62
          NV       0.31      0.77      0.44        26
         SCC       0.67      0.11      0.19        18
        VASC       1.00      0.25      0.40         4

    accuracy                           0.40       141
   macro avg       0.47      0.44      0.34       141
weighted avg       0.55      0.40    

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
consensus_vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,4,0,2,0,1,0,0,0
BCC,0,8,0,0,0,0,1,0
BKL,0,0,5,0,0,0,0,0
DF,0,0,0,5,0,0,0,0
MEL,0,0,1,2,8,3,2,1
NV,1,0,0,0,0,6,0,0
SCC,4,2,1,0,0,0,5,0
VASC,0,0,0,0,0,0,0,8


Unique classes in prediction1: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.7
Jaccard Score: 0.5746392496392496
Cohen Kappa Score: 0.6567826289983656
Precision (Macro): 0.6980
Recall/Sensitivity (Macro): 0.7756
F1 Score (Macro): 0.7120

Classification Report:
               precision    recall  f1-score   support

          AK       0.44      0.57      0.50         7
         BCC       0.80      0.89      0.84         9
         BKL       0.56      1.00      0.71         5
          DF       0.71      1.00      0.83         5
         MEL       0.89      0.47      0.62        17
          NV       0.67      0.86      0.75         7
         SCC       0.62      0.42      0.50        12
        VASC       0.89      1.00      0.94         8

    accuracy                           0.70        70
   macro avg       0.70      0.78      0.71        70
weighted avg       0.73      0.70      0.69      

  if prediction_data_frame.dtypes[0] == 'object':


## CNN vs single expert

In [38]:
for index_sx, id_expert_sx in enumerate(ids_expert):
    print("=="*10)
    print("Expert:", id_expert_sx)
    print("=="*10)
    dataset = diagnosis[diagnosis.wp_users_ID == id_expert_sx].set_index('difficult_img_image_name')
    for type_image in types_of_image:
        dataset_to_use = dataset[dataset.difficult_img_version == type_image]
        crosstab = pd.crosstab(dataset_to_use.vote, truth["vote_ensemble"] )
        display(crosstab)
        metrics(dataset_to_use.vote, truth["vote_ensemble"] )
        print(calculateVdiCramer(crosstab))

Expert: 2


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,13,1,7,0,4,0,0,0
Altro,2,5,2,0,1,1,0,0
BCC,0,12,0,0,2,1,0,0
BKL,1,0,3,0,0,0,0,0
MEL,0,5,6,2,12,33,3,0
NV,5,4,3,0,5,30,0,1
SCC,7,5,0,0,0,1,1,0
VASC,0,0,0,0,0,3,0,1


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.3956043956043956
Jaccard Score: 0.174695050720364
Cohen Kappa Score: 0.2590673575129533
Precision (Macro): 0.2963
Recall/Sensitivity (Macro): 0.3570
F1 Score (Macro): 0.2756

Classification Report:
               precision    recall  f1-score   support

          AK       0.46      0.52      0.49        25
       Altro       0.00      0.00      0.00        11
         BCC       0.38      0.80      0.51        15
         BKL       0.14      0.75      0.24         4
          DF       0.00      0.00      0.00         0
         MEL       0.50      0.20      0.28        61
          NV       0.43      0.62      0.51        48
         SCC       0.25      0.07      0.11        14
        VASC       0.50      0.25      0.33         4

    accuracy                           0.40       182
   macro avg       0.30      0.3

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,2,0,2,0,1,0,0,0
Altro,2,0,1,3,1,0,0,1
BCC,0,6,0,0,0,0,2,1
BKL,1,0,6,1,0,1,0,0
DF,0,0,0,5,0,0,0,0
MEL,0,0,0,1,8,0,2,0
NV,1,1,1,0,0,7,1,0
SCC,4,2,0,0,0,0,4,0
VASC,0,1,0,0,0,0,1,8


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.5897435897435898
Jaccard Score: 0.4102564102564103
Cohen Kappa Score: 0.5382907880133185
Precision (Macro): 0.5306
Recall/Sensitivity (Macro): 0.5886
F1 Score (Macro): 0.5439

Classification Report:
               precision    recall  f1-score   support

          AK       0.20      0.40      0.27         5
       Altro       0.00      0.00      0.00         8
         BCC       0.60      0.67      0.63         9
         BKL       0.60      0.67      0.63         9
          DF       0.50      1.00      0.67         5
         MEL       0.80      0.73      0.76        11
          NV       0.88      0.64      0.74        11
         SCC       0.40      0.40      0.40        10
        VASC       0.80      0.80      0.80        10

    accuracy                           0.59        78
   macro avg       0.53   

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,10,3,7,0,6,3,1,0
Altro,3,1,1,0,4,1,0,0
BCC,1,9,2,0,1,3,0,0
BKL,0,0,3,0,0,1,0,0
MEL,0,2,3,1,9,39,0,0
NV,1,2,2,1,4,18,0,0
SCC,12,13,3,0,2,7,3,0
VASC,1,2,0,0,0,2,0,2


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.2857142857142857
Jaccard Score: 0.14171083283931102
Cohen Kappa Score: 0.16712910070181164
Precision (Macro): 0.3467
Recall/Sensitivity (Macro): 0.3129
F1 Score (Macro): 0.2354

Classification Report:
               precision    recall  f1-score   support

          AK       0.36      0.33      0.34        30
       Altro       0.00      0.00      0.00        10
         BCC       0.28      0.56      0.38        16
         BKL       0.14      0.75      0.24         4
          DF       0.00      0.00      0.00         0
         MEL       0.35      0.17      0.23        54
          NV       0.24      0.64      0.35        28
         SCC       0.75      0.07      0.14        40
        VASC       1.00      0.29      0.44         7

    accuracy                           0.29       189
   macro avg       0.35      

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,4,0,3,0,1,0,0,0
Altro,0,0,0,0,1,0,0,0
BCC,2,7,0,0,0,1,2,0
BKL,0,0,4,1,0,0,0,0
DF,0,0,0,6,0,0,0,0
MEL,0,0,1,1,8,5,3,1
NV,1,0,0,0,0,4,0,0
SCC,3,3,2,0,0,0,5,0
VASC,0,0,0,2,0,0,0,9


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.5875
Jaccard Score: 0.38759820426487096
Cohen Kappa Score: 0.5294117647058824
Precision (Macro): 0.5222
Recall/Sensitivity (Macro): 0.5897
F1 Score (Macro): 0.5268

Classification Report:
               precision    recall  f1-score   support

          AK       0.40      0.50      0.44         8
       Altro       0.00      0.00      0.00         1
         BCC       0.70      0.58      0.64        12
         BKL       0.40      0.80      0.53         5
          DF       0.60      1.00      0.75         6
         MEL       0.80      0.42      0.55        19
          NV       0.40      0.80      0.53         5
         SCC       0.50      0.38      0.43        13
        VASC       0.90      0.82      0.86        11

    accuracy                           0.59        80
   macro avg       0.52      0.59    

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,13,7,2,0,2,1,0,0
Altro,6,3,0,1,0,2,0,0
BCC,0,6,2,0,0,1,0,0
BKL,0,4,4,0,0,4,1,0
DF,1,0,0,0,0,0,0,1
MEL,3,4,9,1,17,46,1,0
NV,1,0,3,0,3,15,0,0
SCC,4,6,2,0,3,2,2,0
VASC,0,2,0,0,1,3,0,1


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.30526315789473685
Jaccard Score: 0.13553033320058408
Cohen Kappa Score: 0.1909155429382542
Precision (Macro): 0.2989
Recall/Sensitivity (Macro): 0.2927
F1 Score (Macro): 0.2265

Classification Report:
               precision    recall  f1-score   support

          AK       0.46      0.52      0.49        25
       Altro       0.00      0.00      0.00        12
         BCC       0.19      0.67      0.29         9
         BKL       0.18      0.31      0.23        13
          DF       0.00      0.00      0.00         2
         MEL       0.65      0.21      0.32        81
          NV       0.20      0.68      0.31        22
         SCC       0.50      0.11      0.17        19
        VASC       0.50      0.14      0.22         7

    accuracy                           0.31       190
   macro avg       0.30 

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,4,0,2,0,1,0,2,0
BCC,0,9,0,0,0,0,2,0
BKL,0,0,3,0,0,0,0,0
MEL,1,0,2,5,9,5,2,2
NV,0,0,1,0,0,5,0,0
SCC,4,1,2,1,0,0,4,1
VASC,1,0,0,4,0,0,0,7


Unique classes in prediction1: ['AK' 'BCC' 'BKL' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.5125
Jaccard Score: 0.34771730462519934
Cohen Kappa Score: 0.44285714285714284
Precision (Macro): 0.5125
Recall/Sensitivity (Macro): 0.5416
F1 Score (Macro): 0.4811

Classification Report:
               precision    recall  f1-score   support

          AK       0.40      0.44      0.42         9
         BCC       0.90      0.82      0.86        11
         BKL       0.30      1.00      0.46         3
          DF       0.00      0.00      0.00         0
         MEL       0.90      0.35      0.50        26
          NV       0.50      0.83      0.62         6
         SCC       0.40      0.31      0.35        13
        VASC       0.70      0.58      0.64        12

    accuracy                           0.51        80
   macro avg       0.51      0.54      0.48        80
weighted avg       0.68      0.51      0.54      

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [39]:
for index_sx, id_expert_sx in enumerate(ids_expert):
    print("=="*10)
    print("Expert:", id_expert_sx)
    print("=="*10)
    dataset = diagnosis[diagnosis.wp_users_ID == id_expert_sx].set_index('difficult_img_image_name')
    
    crosstab = pd.crosstab(dataset.vote, truth["vote_ensemble"] )
    display(crosstab)
    metrics(dataset.vote, truth["vote_ensemble"] )
    print(calculateVdiCramer(crosstab))

Expert: 2


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,15,1,9,0,5,0,0,0
Altro,4,5,3,3,2,1,0,1
BCC,0,18,0,0,2,1,2,1
BKL,2,0,9,1,0,1,0,0
DF,0,0,0,5,0,0,0,0
MEL,0,5,6,3,20,33,5,0
NV,6,5,4,0,5,37,1,1
SCC,11,7,0,0,0,1,5,0
VASC,0,1,0,0,0,3,1,9


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.45384615384615384
Jaccard Score: 0.2910056468030382
Cohen Kappa Score: 0.3578460361081156
Precision (Macro): 0.4118
Recall/Sensitivity (Macro): 0.5220
F1 Score (Macro): 0.4290

Classification Report:
               precision    recall  f1-score   support

          AK       0.39      0.50      0.44        30
       Altro       0.00      0.00      0.00        19
         BCC       0.43      0.75      0.55        24
         BKL       0.29      0.69      0.41        13
          DF       0.42      1.00      0.59         5
         MEL       0.59      0.28      0.38        72
          NV       0.48      0.63      0.54        59
         SCC       0.36      0.21      0.26        24
        VASC       0.75      0.64      0.69        14

    accuracy                           0.45       260
   macro avg       0.41  

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,14,3,10,0,7,3,1,0
Altro,3,1,1,0,5,1,0,0
BCC,3,16,2,0,1,4,2,0
BKL,0,0,7,1,0,1,0,0
DF,0,0,0,6,0,0,0,0
MEL,0,2,4,2,17,44,3,1
NV,2,2,2,1,4,22,0,0
SCC,15,16,5,0,2,7,8,0
VASC,1,2,0,2,0,2,0,11


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.3754646840148699
Jaccard Score: 0.2627917894282801
Cohen Kappa Score: 0.2830308414773448
Precision (Macro): 0.4108
Recall/Sensitivity (Macro): 0.4866
F1 Score (Macro): 0.3892

Classification Report:
               precision    recall  f1-score   support

          AK       0.37      0.37      0.37        38
       Altro       0.00      0.00      0.00        11
         BCC       0.38      0.57      0.46        28
         BKL       0.23      0.78      0.35         9
          DF       0.50      1.00      0.67         6
         MEL       0.47      0.23      0.31        73
          NV       0.26      0.67      0.38        33
         SCC       0.57      0.15      0.24        53
        VASC       0.92      0.61      0.73        18

    accuracy                           0.38       269
   macro avg       0.41   

  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


dx,AK,BCC,BKL,DF,MEL,NV,SCC,VASC
vote,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AK,17,7,4,0,3,1,2,0
Altro,6,3,0,1,0,2,0,0
BCC,0,15,2,0,0,1,2,0
BKL,0,4,7,0,0,4,1,0
DF,1,0,0,0,0,0,0,1
MEL,4,4,11,6,26,51,3,2
NV,1,0,4,0,3,20,0,0
SCC,8,7,4,1,3,2,6,1
VASC,1,2,0,4,1,3,0,8


  if prediction_data_frame.dtypes[0] == 'object':
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unique classes in prediction1: ['AK' 'Altro' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Unique classes in prediction2: ['AK' 'BCC' 'BKL' 'DF' 'MEL' 'NV' 'SCC' 'VASC']
Accuracy: 0.36666666666666664
Jaccard Score: 0.19293457401164057
Cohen Kappa Score: 0.27121479984846575
Precision (Macro): 0.3421
Recall/Sensitivity (Macro): 0.3615
F1 Score (Macro): 0.3051

Classification Report:
               precision    recall  f1-score   support

          AK       0.45      0.50      0.47        34
       Altro       0.00      0.00      0.00        12
         BCC       0.36      0.75      0.48        20
         BKL       0.22      0.44      0.29        16
          DF       0.00      0.00      0.00         2
         MEL       0.72      0.24      0.36       107
          NV       0.24      0.71      0.36        28
         SCC       0.43      0.19      0.26        32
        VASC       0.67      0.42      0.52        19

    accuracy                           0.37       270
   macro avg       0.34

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
