In [1]:
from efficientnet_pytorch import EfficientNet
from PIL import Image
import torch
from torchvision import transforms
import os
from tqdm import *
from sklearn.metrics import confusion_matrix
import numpy as np
import json


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b0')

Loaded pretrained weights for efficientnet-b0


# Inference on a single image

In [4]:
# Preprocess image
tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])
test_image = tfms(Image.open('./dataset/ILSVRC2012_val_00000023.JPEG')).unsqueeze(0)

# Load ImageNet class names
labels_map = json.load(open('categories.json'))
labels_map = [labels_map[str(i)] for i in range(1000)]

# Classify
model.eval()
with torch.no_grad():
    outputs = model(test_image)

# Print predictions
for idx in torch.topk(outputs, k=5).indices.squeeze(0).tolist():
    prob = torch.softmax(outputs, dim=1)[0, idx].item()
    print('{label:<75} ({p:.2f}%)'.format(label=labels_map[idx], p=prob*100))

Granny Smith                                                                (96.73%)
lemon                                                                       (0.10%)
pomegranate                                                                 (0.08%)
fig                                                                         (0.06%)
piggy bank, penny bank                                                      (0.05%)


# Naive implementation on the whole dataset

In [7]:
directory= './dataset/'
nb_samples=5000 # test on a smaller part of the dataset
labels=json.load(open('labels.json'))

grayscale= []
actual=[]
predicted=[]

tfms = transforms.Compose([transforms.Resize(224), transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])

for filename in tqdm(os.listdir(directory)[:nb_samples]):
    f = os.path.join(directory, filename)
    y_true= labels[filename]    
    img=Image.open(f)    
    if img.mode !='RGB':
        grayscale.append(filename)
        img = tfms(img.convert('RGB')).unsqueeze(0)
    else:
        img = tfms(img).unsqueeze(0)
    model.eval()
    with torch.no_grad():
        outputs = model(img)
        y_pred = torch.argmax(outputs).item()
        actual.append(y_true)
        predicted.append(y_pred)

with open('partial_results.json', 'w') as fp:
    results = { i : (actual[i], predicted[i]) for i in range(nb_samples)}
    json.dump(results, fp,  indent=4)

100%|██████████| 5000/5000 [09:30<00:00,  8.76it/s]


# Metrics

In [17]:
results=list(json.load(open('results.json')).values())
actual=[sample[0] for sample in results]
predicted=[sample[1] for sample in results]

def confusion_values (y_true,y_pred):
    confusion = confusion_matrix(y_true,y_pred)
    FP = confusion.sum(axis=0) - np.diag(confusion)  
    FN = confusion.sum(axis=1) - np.diag(confusion)
    TP = np.diag(confusion)
    TN = confusion.sum() - (FP + FN + TP)
    return FP,FN,TP,TN

def get_metrics(FP,FN,TP,TN,nb_smpl):
    # Accuracy
    ACC=sum(TP)/nb_smpl
    # Specificity or True Negative Rate
    TNR=TN/(TN+FP)
    return {'ACC':ACC,'TNR':TNR}

# Global metrics

In [18]:
FP,FN,TP,TN = confusion_values(actual,predicted)
metrics = get_metrics(FP,FN,TP,TN,len(actual))
print(metrics)

{'ACC': 0.74278, 'TNR': array([0.99995996, 0.99991992, 0.99985986, 0.9996997 , 0.99971972,
       0.9995996 , 0.99983984, 0.99993994, 0.99975976, 0.99995996,
       0.99977978, 0.99993994, 0.99987988, 0.99987988, 0.9998999 ,
       0.99991992, 0.99997998, 0.99993994, 0.99997998, 0.99997998,
       0.9998999 , 0.99973974, 0.9998999 , 0.99983984, 1.        ,
       0.99997998, 0.9994995 , 0.99987988, 0.99983984, 0.9998999 ,
       0.9997998 , 0.99983984, 0.99933934, 0.99963964, 0.99983984,
       0.99957958, 0.99965966, 0.99975976, 0.99983984, 0.9997998 ,
       0.9994995 , 0.99967968, 0.99971972, 0.99985986, 0.99977978,
       0.99995996, 0.99967968, 0.99981982, 0.9998999 , 0.99971972,
       0.99995996, 0.99993994, 0.99953954, 0.9996997 , 0.99987988,
       0.99987988, 0.99995996, 0.99985986, 0.9995996 , 0.99967968,
       0.9994995 , 0.99985986, 0.99975976, 0.99971972, 0.99945946,
       0.99983984, 0.99927928, 0.9997998 , 0.99953954, 0.9997998 ,
       0.99973974, 0.99971972, 0.99965

# Class imbalance


## Frequence
The idea was to compute the metrics on the most and least present part of the dataset but they are all present in equal amount in the dataset

In [19]:
actual_dict = {x:actual.count(x) for x in actual}
predicted_dict = {x:predicted.count(x) for x in predicted}
results_dict = {actual[i]: predicted[i] for i in range(len(actual))}

In [31]:
actual_filtered = [ key for (key,value) in actual_dict.items() if value ==50  ]
results_filtered = {key:value for (key,value) in results_dict.items() if key in actual_filtered }
fFP,fFN,fTP,fTN = confusion_values(list(results_filtered.keys()),list(results_filtered.values()))
fmetrics = get_metrics(fFP,fFN,fTP,fTN,len(actual_filtered))
print(fmetrics['ACC'])

0.754


In [48]:
predicted_filtered_max = {key:value for (key,value) in predicted_dict.items() if value >=85  }
predicted_filtered_min = {key:value for (key,value) in predicted_dict.items() if value <=25  }

print('Most predicted classes:')
for key,value in predicted_filtered_max.items():
 print('{label} : {nb}'.format(label=labels_map[key], nb=value))
print('------------------')
print('Least predicted classes:')
for key,value in predicted_filtered_min.items():
 print('{label} : {nb}'.format(label=labels_map[key], nb=value))


Most predicted classes:
hot pot, hotpot : 92
tape player : 101
lakeside, lakeshore : 91
shoji : 86
desk : 93
library : 92
packet : 86
swing : 91
plate : 88
------------------
Least predicted classes:
soup bowl : 24
English foxhound : 21
Windsor tie : 23
sunglasses, dark glasses, shades : 25
car wheel : 23
ram, tup : 21
hair spray : 24
letter opener, paper knife, paperknife : 22
dock, dockage, docking facility : 22
projectile, missile : 21
typewriter keyboard : 25
velvet : 17
sunglass : 20
tiger cat : 16


# Grayscale
We compute the metrics on the grayscale images to see if the color have an effect on the inference. Even on small part of the dataset we can see that the accuracy is quite smaller on grayscale images

In [28]:
grayscale_labels = [labels[filename] for filename in grayscale  ]
results_gray = {key:value for (key,value) in results_dict.items() if key in grayscale_labels }
gFP,gFN,gTP,gTN = confusion_values(list(results_gray.keys()),list(results_gray.values()))
gmetrics = get_metrics(gFP,gFN,gTP,gTN,len(grayscale_labels))
print(gmetrics['ACC'])

0.62
