In [1]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16446 sha256=6c0a4e0543116d4225220cf27abf4bc124155570d214b6d1daddabbdfc1f7208
  Stored in directory: /root/.cache/pip/wheels/0e/cc/b2/49e74588263573ff778da58cc99b9c6349b496636a7e165be6
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [2]:
import os
import time
import copy
import pickle
import random
import numpy as np
import pandas as pd

import itertools
from PIL import Image
from tqdm import tqdm
from scipy.stats import entropy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR

import torchvision
import torchvision.transforms as transforms
from efficientnet_pytorch import EfficientNet

from torch.utils.data import Dataset, DataLoader, Subset
random.seed(100)

In [3]:
batch_size = 16
train_transforms = transforms.Compose([
        transforms.RandomHorizontalFlip(p=0.25),
        transforms.RandomVerticalFlip(p=0.25),
        transforms.RandomAffine(degrees=20, translate=(0.25, 0.25), shear=(-0.25, 0.25)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


test_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

])

In [4]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dictt = pickle.load(fo, encoding='latin1')
    return dictt

In [5]:
# Read dictionary
data_train = unpickle("../input/cifar100/train")
data_test = unpickle("../input/cifar100/test")
data_meta = unpickle("../input/cifar100/meta")

subCategory = pd.DataFrame(data_meta['fine_label_names'], columns=['SubClass'])
subCategoryDict = subCategory.to_dict()

In [6]:
# only use 10000 labelled samples
x_train, y_train = data_train['data'][0:10000], data_train['fine_labels'][0:10000]
x_valid, y_valid = data_test['data'], data_test['fine_labels']
print(x_train.shape, x_valid.shape)
print(type(x_train), type(y_train))

(10000, 3072) (10000, 3072)
<class 'numpy.ndarray'> <class 'list'>


In [7]:
# .transpose(0,2,3,1)

x_train = x_train.reshape(len(x_train),3,32,32)
x_valid = x_valid.reshape(len(x_valid),3,32,32)

x_train, y_train = np.asarray(x_train), np.asarray(y_train)
x_valid, y_valid = np.asarray(x_valid), np.asarray(y_valid)

print(type(x_train), type(y_train), type(x_valid), type(y_valid))
print(y_train.shape, y_valid.shape)
print(np.max(y_train), np.max(y_valid))

<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>
(10000,) (10000,)
99 99


In [8]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data
        self.targets = torch.LongTensor(targets)
        self.transform = transform

    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]
        if self.transform:
            x = Image.fromarray(self.data[index].astype(np.uint8).transpose(1,2,0))
            x = self.transform(x)
        return x, y

    def __len__(self):
        return len(self.data)

In [9]:
train_dataset = MyDataset(x_train, y_train, transform=train_transforms)
valid_dataset = MyDataset(x_valid, y_valid, transform=test_transforms)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, num_workers=4)

dataloaders = {'train': train_dataloader, 'val':valid_dataloader}
dataset_sizes = {'train': len(train_dataset), 'val':len(valid_dataset)}

print(dataset_sizes['train'], dataset_sizes['val'])
print(len(train_dataloader), len(valid_dataloader))

10000 10000
625 625


In [10]:
def train(model, dataloader, optimizer, criterion, dataset_size, device):
    model = model.train()
    running_loss = 0
    running_corrects = 0
    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)  
    return running_loss / dataset_size, running_corrects.double() / dataset_size

In [11]:
def evaluate(model, dataloader, criterion, dataset_size, device):
    running_corrects = 0
    running_loss = 0
    model = model.eval()
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)  
    return running_loss / dataset_size, running_corrects.double() / dataset_size

In [12]:
# train all three models on this sampled_dataset 
# N is number of samples
# this subset is for wide-resnet

N = 8000
random_idx = random.sample(range(0, 10000), N)
sampled_dataset = Subset(train_dataset, random_idx)
sampled_dataloader = DataLoader(sampled_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)

print(len(sampled_dataset))

8000


In [13]:
class WideResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.base_model = torchvision.models.wide_resnet50_2(pretrained=True)
        self.linear = nn.Linear(in_features=self.base_model.fc.in_features, out_features=100)
        self.base_model.fc = self.linear
        
    def forward(self, x): 
        x = self.base_model(x)
        return x

In [14]:
model_wide = WideResNet()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_wide = model_wide.to(device)
optimizer = optim.Adam(model_wide.parameters(), lr=0.0001)
scheduler = MultiStepLR(optimizer, milestones=[10], gamma=0.1)
criterion = nn.CrossEntropyLoss()

Downloading: "https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth" to /root/.cache/torch/hub/checkpoints/wide_resnet50_2-95faca4d.pth


  0%|          | 0.00/132M [00:00<?, ?B/s]

In [15]:
counter = 0
num_epochs = 120
best_loss = 50000
best_loss_acc = 0.0
best_model = copy.deepcopy(model_wide.state_dict())
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch+1, num_epochs))
    print('-' * 10)
    train_epoch_loss, train_epoch_acc = train(model=model_wide, dataloader=sampled_dataloader, criterion=criterion, optimizer=optimizer, dataset_size=len(sampled_dataset), device=device)
    print('Train Loss: {:.4f}, Train Acc: {:.4f}'.format(train_epoch_loss, train_epoch_acc))
    test_epoch_loss, test_epoch_acc = evaluate(model=model_wide, dataloader=valid_dataloader, criterion=criterion, dataset_size=len(valid_dataset), device=device)
    print('Test Loss: {:.4f}, Test Acc: {:.4f}'.format(test_epoch_loss, test_epoch_acc))
    scheduler.step()
    if test_epoch_loss < best_loss:
        counter = 0
        best_loss = copy.deepcopy(test_epoch_loss)
        best_loss_acc = copy.deepcopy(test_epoch_acc)
        print('Best loss till now {:.4f}, saving model'.format(best_loss))
        print('valid accuracy for this loss {:.4f}, saving model'.format(best_loss_acc))
        best_model = copy.deepcopy(model_wide.state_dict())
    else:
        counter = counter + 1
        print("value of counter right now: ", counter)
        if counter >= 5:
            break
        else:
            pass

print(best_loss, best_loss_acc)
# Restoring best model
model_wide.load_state_dict(best_model)

Epoch 1/120
----------
Train Loss: 4.3667, Train Acc: 0.0566
Test Loss: 3.9203, Test Acc: 0.1208
Best loss till now 3.9203, saving model
valid accuracy for this loss 0.1208, saving model
Epoch 2/120
----------
Train Loss: 3.8745, Train Acc: 0.1096
Test Loss: 3.5267, Test Acc: 0.1797
Best loss till now 3.5267, saving model
valid accuracy for this loss 0.1797, saving model
Epoch 3/120
----------
Train Loss: 3.5515, Train Acc: 0.1551
Test Loss: 3.1792, Test Acc: 0.2356
Best loss till now 3.1792, saving model
valid accuracy for this loss 0.2356, saving model
Epoch 4/120
----------
Train Loss: 3.3433, Train Acc: 0.1915
Test Loss: 3.0880, Test Acc: 0.2595
Best loss till now 3.0880, saving model
valid accuracy for this loss 0.2595, saving model
Epoch 5/120
----------
Train Loss: 3.1895, Train Acc: 0.2225
Test Loss: 2.9480, Test Acc: 0.2992
Best loss till now 2.9480, saving model
valid accuracy for this loss 0.2992, saving model
Epoch 6/120
----------
Train Loss: 3.0865, Train Acc: 0.2401
Test

<All keys matched successfully>

In [16]:
torch.save(model_wide.state_dict(), 'wide_resnet_cifar100_8000_samples_scheduler.pth')

In [17]:
print(best_loss, best_loss_acc)

2.269265047073364 tensor(0.4440, device='cuda:0', dtype=torch.float64)


In [18]:
def calculate_mean_confidence(predictions, labels):
    """Computes Average Probability, Calibration Error and Accuracy"""
    probabilities = np.max(predictions, 1)
    acc = (labels == np.argmax(predictions, 1)).sum()
    avg_prob = np.average(probabilities)
    
    return avg_prob, avg_prob - acc / labels.shape[0], acc / labels.shape[0]

In [19]:
predictions_wide = []
labels_wide = []

softmax = nn.Softmax(dim=1)
with torch.no_grad():
    model_wide = model_wide.eval()
    for images, labels in tqdm(valid_dataloader):
        images = images.to(device)
        labels = labels.to(device)
        output = softmax(model_wide(images))
        predictions_wide.append(output.cpu().numpy())
        labels_wide.append(labels.cpu().numpy())

100%|██████████| 625/625 [00:18<00:00, 33.39it/s]


In [20]:
print(len(labels_wide), len(predictions_wide))
print(predictions_wide[0].shape, labels_wide[0].shape)

625 625
(16, 100) (16,)


In [21]:
predictions_wide_final = predictions_wide[0]
labels_wide_final = labels_wide[0]
for i in range(1, len(predictions_wide)):
    predictions_wide_final = np.concatenate((predictions_wide_final, predictions_wide[i]))
    labels_wide_final = np.concatenate((labels_wide_final, labels_wide[i]))

In [22]:
conf_score, cal_err, acc = calculate_mean_confidence(predictions_wide_final, labels_wide_final)
print(conf_score, cal_err, acc)

0.59110266 0.14710265970230102 0.444
