In [2]:
from efficientnet_pytorch import EfficientNet
from PIL import Image
import torch
from torchvision import transforms
import os
from tqdm import *
from sklearn.metrics import confusion_matrix
import numpy as np
import json


  from .autonotebook import tqdm as notebook_tqdm


In [17]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b0')

Loaded pretrained weights for efficientnet-b0


# Implementation naive 

In [28]:
directory= './dataset/'
nb_samples=10000
labels=json.load(open('labels.json'))

grayscale= []
actual=[]
predicted=[]

for filename in tqdm(os.listdir(directory)[:10000]):
    f = os.path.join(directory, filename)
    y_true= labels[filename]    
    tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])
    img=Image.open(f)    
    if img.mode !='RGB':
        grayscale.append(filename)
        img = tfms(img.convert('RGB')).unsqueeze(0)
    else:
        img = tfms(img).unsqueeze(0)
    with torch.no_grad():
        outputs = model(img)
        y_pred = torch.argmax(outputs).item()
        actual.append(y_true)
        predicted.append(y_pred)

with open('results.json', 'w') as fp:
    results = {actual[i]: predicted[i] for i in range(nb_samples)}
    json.dump(results, fp,  indent=4)

100%|██████████| 200/200 [01:10<00:00,  2.85it/s]


In [76]:

def confusion_values (y_true,y_pred):
    confusion = confusion_matrix(y_true,y_pred)
    FP = confusion.sum(axis=0) - np.diag(confusion)  
    FN = confusion.sum(axis=1) - np.diag(confusion)
    TP = np.diag(confusion)
    TN = confusion.sum() - (FP + FN + TP)
    return FP,FN,TP,TN

def get_metrics(FP,FN,TP,TN,nb_smpl):
    # Accuracy
    ACC=sum(TP)/nb_smpl
    # Specificity or True Negative Rate
    TNR=TN/(TN+FP)
    return {'ACC':ACC,'TNR':TNR}

# Metrics

Metrics on the whole dataset

In [29]:
FP,FN,TP,TN = confusion_values(actual,predicted)
metrics = get_metrics(FP,FN,TP,TN,nb_samples)
print(metrics.ACC)

NameError: name 'confusion_values' is not defined

# Class imbalance
We count the frequence of appearance of each class in order to compute metrics on the different parts of the dataset

## Frequence
We compute the metrics on the most present (true) classes 

In [78]:
actual_dict = {x:actual.count(x) for x in actual}
predicted_dict = {x:predicted.count(x) for x in predicted}
results=json.load(open('results.json'))

In [89]:
actual_filtered = [ key for (key,value) in actual_dict.items() if value >= 5 ]
results_filtered = {key:value for (key,value) in results.items() if key in actual_filtered }
fFP,fFN,fTP,fTN = confusion_values(list(results_filtered.keys()),list(results_filtered.values()))
fmetrics = get_metrics(fFP,fFN,fTP,fTN,len(actual_filtered))
print(fmetrics.ACC)

{'ACC': 0.7658450704225352, 'TNR': array([1.        , 1.        , 1.        , 1.        , 0.99823944,
       1.        , 0.99823944, 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 0.99823944,
       1.        , 0.99823944, 1.        , 1.        , 1.        ,
       0.99823944, 1.        , 1.        , 1.        , 1.        ,
       0.99823944, 0.99823633, 1.        , 1.        , 1.        ,
       1.        , 1.        , 0.99823944, 1.        , 1.        ,
       0.99470899, 0.99823633, 1.        , 1.        , 1.        ,
       1.        , 0.99823633, 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.      

# Grayscale
We compute the metrics on the grayscale images to see if the color have an effect on the inference

In [90]:
grayscale_labels = [labels[filename] for filename in grayscale  ]
results_gray = {key:value for (key,value) in results.items() if key in grayscale_labels }
gFP,gFN,gTP,gTN = confusion_values(list(results_gray.keys()),list(results_gray.values()))
gmetrics = get_metrics(gFP,gFN,gTP,gTN,len(grayscale_labels))
print(gmetrics.ACC)

{'ACC': 0.63, 'TNR': array([1.        , 1.        , 0.98947368, 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 0.9893617 ,
       1.        , 1.        , 0.98947368, 0.98947368, 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.98947368, 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 0.98947368,
       1.        , 1.        , 0.98947368, 1.        , 0.98947368,
       1.        , 0.9893617 , 1.        , 1.        , 0.98947368,
       1.        , 1.        , 0.98947368, 1.        , 1.        ,
       1.        , 1.        , 1.        , 0.98947368, 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 0.98947368, 1.        , 0.9893617 , 1.        ,
       0.97894737, 1.        , 1.        