# Results Analysis

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from collections import Counter

## 1. Read Predictions Files


In [2]:
columns = ['prob_acne_comedos', 'prob_acne_cystic', 'prob_acne_excoriated',
       'prob_acne_mixed', 'prob_acne_scars', 'prob_actinic_keratosis',
       'prob_alopecia_androgenic', 'prob_alopecia_areata',
       'prob_atopic_dermatitis', 'prob_cheilitis_eczematous',
       'prob_chronic_hand_eczema', 'prob_dyshidrosis', 'prob_folliculitis',
       'prob_genital_warts', 'prob_granuloma_annulare', 'prob_herpes_simplex',
       'prob_intertrigo', 'prob_keratosis_pilaris',
       'prob_lichen_simplex_chronicus', 'prob_melanonychia', 'prob_melasma',
       'prob_molluscum', 'prob_nevus', 'prob_nummular_eczema',
       'prob_peri_oral_dermatitis', 'prob_pityriasis_rosae',
       'prob_plane_warts', 'prob_prurigo_nodularis', 'prob_psoriasis_guttate',
       'prob_psoriasis_pustular_palmoplantar', 'prob_psoriasis_vulgar',
       'prob_rosacea_erythemato_telangiectasique', 'prob_rosacea_inflammatory',
       'prob_seborrheic_dermatitis', 'prob_seborrheic_keratosis',
       'prob_shingles', 'prob_tinea_corporis', 'prob_tinea_versicolor',
       'prob_urticaria', 'prob_vitiligo', 'prob_vulgar_warts', 'filename',
       'label']

In [3]:
classes = ['rosacea_inflammatory', 'atopic_dermatitis', 'rosacea_erythemato_telangiectasique','peri_oral_dermatitis',
                'seborrheic_keratosis','psoriasis_vulgar','seborrheic_dermatitis','nummular_eczema',
                'tinea_versicolor','chronic_hand_eczema','vulgar_warts','folliculitis','alopecia_androgenic',
                'dyshidrosis','nevus','melasma','alopecia_areata','intertrigo','urticaria','vitiligo','keratosis_pilaris',
                'molluscum','cheilitis_eczematous','tinea_corporis','prurigo_nodularis','actinic_keratosis',
                'genital_warts','plane_warts','pityriasis_rosae','melanonychia','psoriasis_pustular_palmoplantar',
                'granuloma_annulare','psoriasis_guttate','lichen_simplex_chronicus','shingles','herpes_simplex',
                'acne_cystic', 'acne_scars', 'acne_excoriated', 'acne_comedos', 'acne_mixed',]

In [4]:
df_test = pd.read_csv(f'gs://oro-ds-test-bucket/sdd_acne_files/mlflow_prod/0296a7b8092b4dd8a05a2132c7147364/artifacts/files/bit-2-test_predictions.csv')[columns]
df_train = pd.read_csv(f'gs://oro-ds-test-bucket/sdd_acne_files/mlflow_prod/0296a7b8092b4dd8a05a2132c7147364/artifacts/files/bit-2-train_predictions.csv')[columns]

In [5]:
# df_test = pd.read_csv(f'gs://oro-ds-test-bucket/sdd_acne_files/mlflow_prod/888ad4dd968b4c28aeeaec131e3d8c41/artifacts/files/bit-3-test_predictions.csv')[columns]
# df_train = pd.read_csv(f'gs://oro-ds-test-bucket/sdd_acne_files/mlflow_prod/888ad4dd968b4c28aeeaec131e3d8c41/artifacts/files/bit-3-train_predictions.csv')[columns]

## 2. Check Intersection Between Files

In [6]:
print(f"The intersection between test and train set is: {len(set(df_test['filename']).intersection(set(df_train['filename'])))}")

The intersection between test and train set is: 0


## 3. Compute Top-k Report

In [7]:
def get_top_k_classification_report(df_preds: pd.DataFrame, k: int, classes: list):
    """Generate top-3 classification report

    Args:
        df_preds (pd.DataFrame): predictions
        classes (list): list of classes

    Returns:
        dict: top-3 classification report
    """        
    exp_name= 'top'+str(k)+'_prediction'
    print(exp_name)
    prob_columns = ['prob_' + disease for disease in classes]
    preds_list = []
    for i in range(k):
        str_name = 'Pred'+ str(i+1)    
        preds_list.append(str_name)
        df_preds[str_name] =df_preds[prob_columns].apply(lambda x: x.sort_values(ascending=False).index[i].replace('prob_', ''), axis=1)
    df_preds['labels'] = df_preds['label']
    df_preds[exp_name] = df_preds.apply(lambda row: bool(set([row['labels']]).intersection(set(row[preds_list].values))), axis=1).reset_index(drop=True)
    print(f"{exp_name} Accuracy: {df_preds[exp_name].value_counts()[True]/df_preds.shape[0]:0.3f}%.")
    y_pred = df_preds.apply(lambda x : x['labels'] if x[exp_name] else x['Pred1'] , axis=1).to_list()
    y_score = df_preds.apply(lambda x : x['labels'], axis=1).to_list()
    report = classification_report( y_score,y_pred, digits=4, output_dict=True)
    #print(classification_report( y_score,y_pred, digits=4, output_dict=False))
    return pd.DataFrame(report).transpose()

In [8]:
df_1 = get_top_k_classification_report(df_test, k=1, classes=classes)
df_2 = get_top_k_classification_report(df_test, k=2, classes=classes)
df_3 = get_top_k_classification_report(df_test, k=3, classes=classes)

top1_prediction
top1_prediction Accuracy: 0.690%.
top2_prediction
top2_prediction Accuracy: 0.831%.
top3_prediction
top3_prediction Accuracy: 0.890%.


In [9]:
df_1.rename(columns={'recall': 'recall_top_1'}, inplace=True)
df_1.drop(columns={'precision', 'f1-score', 'support'}, inplace=True)
df_2.rename(columns={'recall': 'recall_top_2'}, inplace=True)
df_2.drop(columns={'precision', 'f1-score', 'support'}, inplace=True)
df_3.rename(columns={'recall': 'recall_top_3'}, inplace=True)
df_3.drop(columns={'precision', 'f1-score'},  inplace=True)
print('dataset updates')

dataset updates


In [10]:
# prob_columns = ['prob_' + disease for disease in classes]
# df_test[prob_columns].apply(lambda x: x.sort_values(ascending=False).index[1].replace('prob_', ''), axis=1)

In [11]:
pd.concat([df_1, df_2, df_3], axis=1)

Unnamed: 0,recall_top_1,recall_top_2,recall_top_3,support
acne_comedos,0.275862,0.62069,0.689655,29.0
acne_cystic,0.431818,0.681818,0.818182,44.0
acne_excoriated,0.421053,0.631579,0.789474,19.0
acne_mixed,0.733333,0.916667,0.961111,180.0
acne_scars,0.4,0.6,0.85,20.0
actinic_keratosis,0.764706,0.823529,0.941176,17.0
alopecia_androgenic,0.911765,0.970588,1.0,34.0
alopecia_areata,0.846154,0.923077,0.923077,13.0
atopic_dermatitis,0.592308,0.769231,0.838462,130.0
cheilitis_eczematous,0.590909,0.727273,0.909091,22.0


In [91]:
# df_test_1 = pd.read_csv('gs://oro-ds-test-bucket/sdd_acne_files/mlflow_prod/d08d37b42d5541d78ab6a195780a3b81/artifacts/files/bit-2-test_predictions.csv')
# df_test_2 = pd.read_csv('gs://oro-ds-test-bucket/sdd_acne_files/mlflow_prod/6ac899ff4578403894201dd7081ceaed/artifacts/files/bit-0-test_predictions.csv')
# df_train_1 = pd.read_csv('gs://oro-ds-test-bucket/sdd_acne_files/mlflow_prod/d08d37b42d5541d78ab6a195780a3b81/artifacts/files/bit-2-train_predictions.csv')
# df_train_2 = pd.read_csv('gs://oro-ds-test-bucket/sdd_acne_files/mlflow_prod/6ac899ff4578403894201dd7081ceaed/artifacts/files/bit-0-train_predictions.csv')

In [93]:
# masks = df_train_1['label_names'].apply(lambda x: True if len(eval(x)) ==1 else False)
# df_train_1 =df_train_1[masks].reset_index(drop=True)
# df_train_1['label'] = df_train_1['label_names'].apply(lambda x: eval(x)[0])
# masks = df_train_2['label_names'].apply(lambda x: True if len(eval(x)) ==1 else False)
# df_train_2 =df_train_2[masks].reset_index(drop=True)
# df_train_2['label'] = df_train_2['label_names'].apply(lambda x: eval(x)[0])
# masks = df_test_1['label_names'].apply(lambda x: True if len(eval(x)) ==1 else False)
# df_test_1 =df_test_1[masks].reset_index(drop=True)
# df_test_1['label'] = df_test_1['label_names'].apply(lambda x: eval(x)[0])
# masks = df_test_2['label_names'].apply(lambda x: True if len(eval(x)) ==1 else False)
# df_test_2 =df_test_2[masks].reset_index(drop=True)
# df_test_2['label'] = df_test_2['label_names'].apply(lambda x: eval(x)[0])
# get_top_k_classification_report(df_train_1, k=3, classes=classes)
# get_top_k_classification_report(df_train_2, k=3, classes=classes)
# get_top_k_classification_report(df_test_1, k=3, classes=classes)
# get_top_k_classification_report(df_test_2, k=3, classes=classes)

In [105]:
# df_test_1.rename(columns={'Pred1': 'bit1Pred1', 'Pred2': 'bit1Pred2', 'Pred3': 'bit1Pred3',
#                           'Prob1': 'bit1Prob1', 'Prob2': 'bit1Prob2', 'Prob3': 'bit1Prob3', }, inplace=True)
# df_test_2.rename(columns={'Pred1': 'bit2Pred1', 'Pred2': 'bit2Pred2', 'Pred3': 'bit2Pred3',
#                           'Prob1': 'bit2Prob1', 'Prob2': 'bit2Prob2', 'Prob3': 'bit2Prob3', }, inplace=True)
# df_test_1 = df_test_1[['bit1Pred1','bit1Pred2', 'bit1Pred3',
#                         'bit1Prob1','bit1Prob2', 'bit1Prob3', 'label', 'filename']]
# df_test_2 = df_test_2[['bit2Pred1','bit2Pred2', 'bit2Pred3',
#                         'bit2Prob1','bit2Prob2', 'bit2Prob3', 'label', 'filename']]

In [121]:
# df_test_1.merge(df_test_2, on=['filename', 'label'], how='inner').to_csv('results.csv')

In [123]:
# df_train_1.rename(columns={'Pred1': 'bit1prediction', 'Pred2': 'bit1_y_pred1', 'Pred3': 'bit1_y_pred2',
#                           'Prob1': 'bit1_proba', 'Prob2': 'bit1_y_prob1', 'Prob3': 'bit1_y_prob2', }, inplace=True)
# df_train_2.rename(columns={'Pred1': 'bit2prediction', 'Pred2': 'bit2_y_pred1', 'Pred3': 'bit2_y_pred2',
#                           'Prob1': 'bit2_proba', 'Prob2': 'bit2_y_prob1', 'Prob3': 'bit2_y_prob2', }, inplace=True)
# df_train_1 = df_train_1[[ 'bit1prediction', 'bit1_y_pred1',  'bit1_y_pred2',
#                           'bit1_proba', 'bit1_y_prob1', 'bit1_y_prob2',  'label', 'filename']]
# df_train_2 = df_train_2[[ 'bit2prediction', 'bit2_y_pred1',  'bit2_y_pred2',
#                           'bit2_proba', 'bit2_y_prob1', 'bit2_y_prob2',  'label', 'filename']]

In [124]:
# df_train_1.merge(df_train_2, on=['filename', 'label'], how='inner').to_csv('results.csv')