In [0]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()   # interactive mode
# cuda = torch.device('cuda')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [0]:
data_dir = "data/"

In [3]:
from data.load_data import load_datasets
train_dataset, val_dataset, test_dataset, classes = load_datasets(data_dir)
class_names = train_dataset.classes
print(len(class_names))

dataset_sizes = {"train": len(train_dataset), "val": len(val_dataset)}

Downloading http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar to data/StanfordDogsData/images.tar


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Downloading http://vision.stanford.edu/aditya86/ImageNetDogs/annotation.tar to data/StanfordDogsData/annotation.tar


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Downloading http://vision.stanford.edu/aditya86/ImageNetDogs/lists.tar to data/StanfordDogsData/lists.tar


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Training set stats:
9600 samples spanning 120 classes (avg 80.000000 per class)
Validation set stats:
2400 samples spanning 120 classes (avg 20.000000 per class)
Testing set stats:
8580 samples spanning 120 classes (avg 71.500000 per class)
120


In [0]:
kwargs = {'num_workers': 1, 'pin_memory': True}
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
                 batch_size=batch_size, shuffle=True, **kwargs)
val_loader = torch.utils.data.DataLoader(val_dataset,
                 batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset,
                 batch_size=batch_size, shuffle=True, **kwargs)
dataloaders = {"train": train_loader, "val": val_loader}

In [0]:
def evaluate(model, optimizer, criterion):
    model.eval()
    running_loss = 0
    running_corrects = 0
    n_examples = 0
    with torch.no_grad():
        
        model.eval()   # Set model to evaluate mode

        for inputs, labels in test_loader:
        
          inputs = inputs.to(device)
          # inputs = inputs.cuda()
          labels = labels.to(device)
          # labels = labels.cuda()

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward
          # track history if only in train
          with torch.set_grad_enabled(False):
              outputs = model(inputs)
              _, preds = torch.max(outputs, 1)
              loss = criterion(outputs, labels)

          # statistics
          running_loss += loss.item() * inputs.size(0)
          running_corrects += torch.sum(preds == labels.data)
          n_examples += preds.size(0)

    epoch_loss = running_loss / len(test_dataset)
    epoch_acc = 100. * running_corrects.double() / len(test_dataset)
    running_loss /= n_examples
    return running_loss, running_corrects, n_examples, epoch_acc

In [0]:
def train_model(model, model_name, criterion, optimizer, scheduler, hp_info, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    file_name = "{model_name}.log"
    file_name = file_name.format(model_name=model_name, lr=hp_info['lr'], momentum=hp_info['momentum'])
    log_file = open(file_name, 'w')
    
    print('-'*10)
    print('learning rate: {}, momentum: {}'.format(hp_info['lr'], hp_info['momentum']))

    for epoch in range(num_epochs):
        # print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        # print('-' * 10)

        # Each epoch has a training and validation phase
        log_file_string = ""
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                # inputs = inputs.cuda()
                labels = labels.to(device)
                # labels = labels.cuda()

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            
            if phase == 'train':
                log_file_string = 'Train Epoch: {}\t Train Loss: {:.6f}\t Train Acc:{}\t '.format(epoch, epoch_loss, epoch_acc)
            else:
                log_file_string += 'Val Loss: {}\t Val Acc: {}\n'.format(epoch_loss, epoch_acc)
                log_file.write(log_file_string)
                print(log_file_string)
            # log_file.write(log_file_string)
            # print(log_file_string)

            # print('{} Loss: {:.4f} Acc: {:.4f}'.format(
            #     phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        # print()

    time_elapsed = time.time() - since
    # print('Training complete in {:.0f}m {:.0f}s'.format(
    #     time_elapsed // 60, time_elapsed % 60))
    # print('Best val Acc: {:4f}'.format(best_acc))  

    # load best model weights
    model.load_state_dict(best_model_wts)

    test_loss, test_correct, test_n_examples, test_acc = evaluate(model, optimizer, criterion)
    log_file_string = '\ntest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
    log_file_string = log_file_string.format(test_loss, test_correct, test_n_examples, test_acc)
    log_file.write(log_file_string)
    print(log_file_string)

    log_file.close()

    return model, test_acc, best_acc

In [7]:
lr = 0.01
mom = 0.9

model_type = "resnext"

criterion = nn.CrossEntropyLoss()

model_conv = models.resnext101_32x8d(pretrained=True)

for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features

# model_conv.fc = nn.Linear(num_ftrs, 120)
model_conv.fc = nn.Sequential(nn.Linear(num_ftrs, 512), nn.Linear(512, 512), nn.Linear(512, 120))

model_conv = model_conv.to(device)

# Observe that only parameters of final layer are being optimized as
# opposed to before.
params = model_conv.fc.parameters()
optimizer_conv = optim.SGD(params, lr=lr, momentum=mom)

# Decay LR by a factor of 0.1 every 5 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)
hp_info = {'lr': lr, 'momentum': mom}
# my_models.append({'model': model_conv, 'optimizer': optimizer_conv, 'exp_lr_scheduler': exp_lr_scheduler, 'hp_info': hp_info})

Downloading: "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth" to /root/.cache/torch/checkpoints/resnext101_32x8d-8ba56ff5.pth


HBox(children=(IntProgress(value=0, max=356082095), HTML(value='')))




In [8]:
model_conv, test_acc, val_acc = train_model(model_conv,
                                        model_type, 
                                        criterion, 
                                        optimizer_conv,
                                        exp_lr_scheduler, 
                                        hp_info, 
                                        num_epochs=20)

----------
learning rate: 0.01, momentum: 0.9
Train Epoch: 0	 Train Loss: 2.079241	 Train Acc:0.535	 Val Loss: 0.8304375346501668	 Val Acc: 0.7612500000000001

Train Epoch: 1	 Train Loss: 0.852193	 Train Acc:0.7567708333333334	 Val Loss: 0.762613662481308	 Val Acc: 0.7837500000000001

Train Epoch: 2	 Train Loss: 0.791131	 Train Acc:0.7835416666666667	 Val Loss: 0.759389332930247	 Val Acc: 0.7908333333333334

Train Epoch: 3	 Train Loss: 0.753601	 Train Acc:0.7889583333333334	 Val Loss: 0.6980640695492426	 Val Acc: 0.8

Train Epoch: 4	 Train Loss: 0.729628	 Train Acc:0.7961458333333333	 Val Loss: 0.8308371857802073	 Val Acc: 0.7791666666666667

Train Epoch: 5	 Train Loss: 0.699910	 Train Acc:0.7973958333333334	 Val Loss: 0.7728929396470388	 Val Acc: 0.78625

Train Epoch: 6	 Train Loss: 0.669006	 Train Acc:0.8119791666666667	 Val Loss: 0.7057832290728887	 Val Acc: 0.81

Train Epoch: 7	 Train Loss: 0.533072	 Train Acc:0.8522916666666667	 Val Loss: 0.6373159343004227	 Val Acc: 0.82708333333

In [0]:
import model_evaluation_utils as meu

In [0]:
test_labels = []
test_predictions = []
for inputs, labels in test_loader:
  test_labels.extend(labels.numpy())
  inputs = inputs.to(device)
  outputs = model_conv(inputs)
  _, preds = torch.max(outputs, 1)
  test_predictions.extend(preds.cpu().numpy())

assert len(test_labels) == len(test_predictions)

In [47]:
meu.get_metrics(true_labels=test_labels, predicted_labels=test_predictions)

Accuracy: 0.8442
Precision: 0.8494
Recall: 0.8442
F1 Score: 0.8452


In [63]:
data_labels = pd.read_csv('labels.csv')
target_labels = data_labels['breed']
# pd.get_dummies(target_labels, sparse=True)
target_labels

0                     boston_bull
1                           dingo
2                        pekinese
3                        bluetick
4                golden_retriever
                   ...           
10217                      borzoi
10218              dandie_dinmont
10219                    airedale
10220          miniature_pinscher
10221    chesapeake_bay_retriever
Name: breed, Length: 10222, dtype: object

In [0]:
from data.dog_classes import classes
import pandas as pd
# class_labels = pd.DataFrame(classes)
class_labels = classes

In [0]:
test_labels_class_names = []
for label in test_labels:
  test_labels_class_names.append(class_labels[label])

In [71]:
pd.get_dummies(test_labels_class_names, sparse=True)

Unnamed: 0,Affenpinscher,Afghan Hound,African Hunting Dog,Airedale,American Staffordshire Terrier,Appenzeller,Australian Terrier,Basenji,Basset Hound,Beagle,Bedlington Terrier,Bernese Mountain Dog,Black-and-tan Coonhound,Blenheim Spaniel,Bloodhound,Bluetick,Border Collie,Border Terrier,Borzoi,Boston Bull,Bouvier des Flandres,Boxer,Brabancon Griffon,Briard,Brittany,Bull Mastiff,Cairn,Cardigan,Chesapeake Bay Retriever,Chihuaha,Chow,Clumber,Cocker Spaniel,Collie,Curly-coater Retriever,Dandi Dinmont,Dhole,Dingo,Doberman,English Foxhound,...,Norwegian Elkhound,Norwich Terrier,Old English Sheepdog,Otterhound,Papillon,Pekinese,Pembroke,Pomeranian,Pug,Redbone,Rhodesian Ridgeback,Rottweiler,Saint Bernard,Saluki,Samoyed,Schipperke,Scotch Terrier,Scottish Deerhound,Sealyham Terrier,Shetland Sheepdog,Shih-Tzu,Siberian Husky,Silky Terrier,Soft-coated Wheaten Terrier,Staffordshire Bullterrier,Standard Poodle,Standard Schnauzer,Sussex Spaniel,Tibetan Mastiff,Tibetan Terrier,Toy Poodle,Toy Terrier,Vizsla,Walker Hound,Weimaraner,Welsh Springer Spaniel,West Highland White Terrier,Whippet,Wirehaired Fox Terrier,Yorkshire Terrier
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8575,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
8576,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8577,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8578,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [98]:
# print(test_labels)
# print(test_predictions)
# print(class_labels)
# meu.display_classification_report(true_labels=test_labels, 
#                                   predicted_labels=test_predictions, 
#                                   classes=class_labels)
from sklearn.metrics import confusion_matrix

confusion_matrix(test_labels, test_predictions)

array([[ 45,   0,   0, ...,   0,   0,   0],
       [  1,  70,   0, ...,   0,   0,   0],
       [  0,   1, 129, ...,   0,   0,   0],
       ...,
       [  0,   0,   0, ...,  47,   1,   0],
       [  0,   0,   0, ...,   2,  45,   2],
       [  0,   0,   0, ...,   0,   1,  67]])

In [0]:
from sklearn.metrics import classification_report
report = classification_report(test_labels, test_predictions, target_names=class_labels)

In [90]:
print(report)

                                precision    recall  f1-score   support

                      Chihuaha       0.71      0.87      0.78        52
              Japanese Spaniel       0.88      0.82      0.85        85
                   Maltese Dog       0.96      0.85      0.90       152
                      Pekinese       0.88      0.86      0.87        49
                      Shih-Tzu       0.79      0.77      0.78       114
              Blenheim Spaniel       0.91      0.89      0.90        88
                      Papillon       0.91      0.93      0.92        96
                   Toy Terrier       0.78      0.83      0.81        72
           Rhodesian Ridgeback       0.84      0.74      0.79        72
                  Afghan Hound       0.98      0.90      0.94       139
                  Basset Hound       0.83      0.92      0.87        75
                        Beagle       0.88      0.80      0.84        95
                    Bloodhound       0.90      0.85      0.88  

In [0]:
with open("classification_report.txt", "w") as f:
  f.write(report)
  f.close()

In [0]:
report_dict = classification_report(test_labels, test_predictions, target_names=class_labels, output_dict=True)

In [93]:
report_dict

{'Affenpinscher': {'f1-score': 0.9074074074074074,
  'precision': 0.8448275862068966,
  'recall': 0.98,
  'support': 50},
 'Afghan Hound': {'f1-score': 0.9398496240601504,
  'precision': 0.984251968503937,
  'recall': 0.8992805755395683,
  'support': 139},
 'African Hunting Dog': {'f1-score': 0.937062937062937,
  'precision': 0.9054054054054054,
  'recall': 0.9710144927536232,
  'support': 69},
 'Airedale': {'f1-score': 0.8571428571428571,
  'precision': 0.8936170212765957,
  'recall': 0.8235294117647058,
  'support': 102},
 'American Staffordshire Terrier': {'f1-score': 0.7213114754098361,
  'precision': 0.7586206896551724,
  'recall': 0.6875,
  'support': 64},
 'Appenzeller': {'f1-score': 0.7058823529411765,
  'precision': 0.7058823529411765,
  'recall': 0.7058823529411765,
  'support': 51},
 'Australian Terrier': {'f1-score': 0.802197802197802,
  'precision': 0.8488372093023255,
  'recall': 0.7604166666666666,
  'support': 96},
 'Basenji': {'f1-score': 0.8909090909090909,
  'precisi

In [96]:
with open("classification_report_short.txt", "r") as f:
  text = f.read()

print(text)

                                precision    recall  f1-score   support

                      Chihuaha       0.71      0.87      0.78        52
              Japanese Spaniel       0.88      0.82      0.85        85
                   Maltese Dog       0.96      0.85      0.90       152
                      Pekinese       0.88      0.86      0.87        49
                      Shih-Tzu       0.79      0.77      0.78       114
                                     ..............
                                     ..............
                                     ..............
               Standard Poodle       0.76      0.80      0.78        59
              Mexican Hairless       0.91      0.95      0.93        55
                         Dingo       0.73      0.84      0.78        56
                         Dhole       0.90      0.90      0.90        50
           African Hunting Dog       0.91      0.97      0.94        69

                      accuracy                    