In [None]:
!pip install efficientnet_pytorch

In [None]:
import os
import sys
import time
import copy
import random
import pickle
import itertools
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from scipy.stats import entropy
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

import torchvision.transforms as transforms
from torch.utils.data import Dataset,DataLoader, Subset
import PIL, PIL.ImageOps, PIL.ImageEnhance, PIL.ImageDraw
import torchvision.transforms.functional as F
from efficientnet_pytorch import EfficientNet

In [None]:
batch_size = 16
train_transforms = transforms.Compose([
        transforms.RandomHorizontalFlip(p=0.25),
        transforms.RandomVerticalFlip(p=0.25),
        transforms.RandomAffine(degrees=20, translate=(0.25, 0.25), shear=(-0.25, 0.25)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


test_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

])

In [None]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dictt = pickle.load(fo, encoding='latin1')
    return dictt

In [None]:
# Read dictionary
data_train = unpickle("../input/cifar100/train")
data_test = unpickle("../input/cifar100/test")
data_meta = unpickle("../input/cifar100/meta")

In [None]:
subCategory = pd.DataFrame(data_meta['fine_label_names'], columns=['SubClass'])
subCategoryDict = subCategory.to_dict()
for key in list(subCategoryDict.keys()):
    print(subCategoryDict[key])

In [None]:
# only use 10000 labelled samples

x_train, y_train = data_train['data'][0:10000], data_train['fine_labels'][0:10000]
x_test, y_test = data_test['data'], data_test['fine_labels']
print(x_train.shape, x_test.shape)
print(type(x_train), type(y_train))

In [None]:
x_train = x_train.reshape(len(x_train),3,32,32)
x_test = x_test.reshape(len(x_test),3,32,32)

x_train, y_train = np.asarray(x_train), np.asarray(y_train)
x_test, y_test = np.asarray(x_test), np.asarray(y_test)

print(type(x_train), type(y_train), type(x_test), type(y_test))
print(y_train.shape, y_test.shape)
print(np.max(y_train), np.max(y_test))

In [None]:
class MyDataset(Dataset):
    def __init__(self, data, targets, transform=None):
        self.data = data
        self.targets = torch.LongTensor(targets)
        self.transform = transform

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]
        if self.transform:
            # converting the array into a PIL image
            x = Image.fromarray(self.data[index].astype(np.uint8).transpose(1,2,0))
            x = self.transform(x)
        return {'images': x, 'labels': y}

In [None]:
train_dataset = MyDataset(x_train, y_train, transform=train_transforms)
test_dataset = MyDataset(x_test, y_test, transform=test_transforms)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=4)

dataloaders = {'train': train_dataloader, 'test':test_dataloader}
dataset_sizes = {'train': len(train_dataset), 'test':len(test_dataset)}

print(dataset_sizes['train'], dataset_sizes['test'])
print(len(train_dataloader), len(test_dataloader))

In [None]:
x_unlabelled = data_train['data'][10000:30000]
x_unlabelled = x_unlabelled.reshape(len(x_unlabelled),3,32,32)
x_unlabelled = np.asarray(x_unlabelled)
print(x_unlabelled.shape, type(x_unlabelled))

In [None]:
class UnlabelledDataset(Dataset):
    def __init__(self, data, m, transforms=None):
        self.data = data
        self.transforms = transforms
        self.images_list = list(np.arange(1, (len(self.data)+1)))

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        image = self.data[index]
        image = Image.fromarray(image.astype(np.uint8).transpose(1,2,0))
        if self.transforms is not None:
            image = self.transforms(image)
        return {'image_names': index, 'images': image}

In [None]:
unlabelled_dataset = UnlabelledDataset(x_unlabelled, m=4, transforms=test_transforms)
unlabelled_dataloader = DataLoader(unlabelled_dataset, batch_size=256, num_workers=8, shuffle=False, pin_memory=True)

print(len(unlabelled_dataset))
print(len(unlabelled_dataloader))

In [None]:
class Baseline_Efficient(nn.Module):
    def __init__(self):
        super().__init__()
        self.base_model = EfficientNet.from_name('efficientnet-b0')
        self.linear = nn.Linear(in_features=self.base_model._fc.in_features, out_features=100)
        self.base_model._fc = self.linear

    def forward(self, x):
        x = self.base_model(x)
        return x


class Baseline_WideResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.base_model = torchvision.models.wide_resnet50_2(pretrained=False)
        self.linear = nn.Linear(in_features=self.base_model.fc.in_features, out_features=100)
        self.base_model.fc = self.linear

    def forward(self, x):
        x = self.base_model(x)
        return x


class Baseline_ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.base_model = torchvision.models.resnet18(pretrained=False)
        self.linear = nn.Linear(in_features=512, out_features=100)
        self.base_model.fc = self.linear

    def forward(self, x):
        x = self.base_model(x)
        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

res_path = '../input/resnet-model-cifar100-8000-samples-with-scheduler/resnet_cifar100_8000_samples_scheduler.pth'
eff_path = '../input/eff-net-model-cifar100-8000-samples/efficientNet_cifar100_8000_samples_scheduler.pth'
wide_path = '../input/wide-net-model-cifar100-8000-samples/wide_resnet_cifar100_8000_samples_scheduler.pth'

In [None]:
# Baseline_model_res = Baseline_ResNet()
# Baseline_model_eff = Baseline_Efficient()
# Baseline_model_wide = Baseline_WideResNet()


# Baseline_model_res.load_state_dict(torch.load('../input/resnet-model-cifar100-8000-samples-with-scheduler/resnet_cifar100_8000_samples_scheduler.pth'))
# Baseline_model_eff.load_state_dict(torch.load('../input/eff-net-model-cifar100-8000-samples/efficientNet_cifar100_8000_samples_scheduler.pth'))
# Baseline_model_wide.load_state_dict(torch.load('../input/wide-net-model-cifar100-8000-samples/wide_resnet_cifar100_8000_samples_scheduler.pth'))

# Baseline_model_eff = Baseline_model_eff.to(device)
# Baseline_model_res = Baseline_model_res.to(device)
# Baseline_model_wide = Baseline_model_wide.to(device)

In [None]:
def sharpen(vector, t=0.3):
    n = np.power(vector, 1 / t)
    d = np.sum(np.power(vector, 1 / t))
    return n / d

In [None]:
# softmax = nn.Softmax(dim=1)
# predictions = []
# image_names = []

# with torch.no_grad():
#     Baseline_model_res = Baseline_model_res.eval()
#     for sample in tqdm(unlabelled_dataloader):
#         images = sample['images'].to(device)
#         batch_size = images.size(0)
#         num_ensemble = images.size(1)
#         images = images.view(images.size(0) * images.size(1), images.size(2), images.size(3), images.size(4))
#         output = softmax(Baseline_model_res(images))
#         output = output.view(batch_size, num_ensemble, output.size(1))
#         output = torch.mean(output, 1)
#         predictions.append(output.cpu().numpy())
#         image_names.append(sample['image_names'])

# predictions_a_np = predictions[0]
# for i in range(1, len(predictions)):
#     predictions_a_np = np.concatenate((predictions_a_np, predictions[i]))

In [None]:
# predictions = []
# with torch.no_grad():
#     Baseline_model_eff = Baseline_model_eff.eval()
#     for sample in tqdm(unlabelled_dataloader):
#         images = sample['images'].to(device)
#         batch_size = images.size(0)
#         num_ensemble = images.size(1)
#         images = images.view(images.size(0) * images.size(1), images.size(2), images.size(3), images.size(4))
#         output = softmax(Baseline_model_eff(images))
#         output = output.view(batch_size, num_ensemble, output.size(1))
#         output = torch.mean(output, 1)
#         predictions.append(output.cpu().numpy())

# predictions_b_np = predictions[0]
# for i in range(1, len(predictions)):
#     predictions_b_np = np.concatenate((predictions_b_np, predictions[i]))

In [None]:
# predictions = []
# with torch.no_grad():
#     Baseline_model_wide = Baseline_model_wide.eval()
#     for sample in tqdm(unlabelled_dataloader):
#         images = sample['images'].to(device)
#         batch_size = images.size(0)
#         num_ensemble = images.size(1)
#         images = images.view(images.size(0) * images.size(1), images.size(2), images.size(3), images.size(4))
#         output = softmax(Baseline_model_wide(images))
#         output = output.view(batch_size, num_ensemble, output.size(1))
#         output = torch.mean(output, 1)
#         predictions.append(output.cpu().numpy())

# predictions_c_np = predictions[0]
# for i in range(1, len(predictions)):
#     predictions_c_np = np.concatenate((predictions_c_np, predictions[i]))

In [None]:
def pseudo_label(model_res_path, model_eff_path, model_wide_path, unlabelled_dataloader, device):
    Baseline_model_res = Baseline_ResNet()
    Baseline_model_eff = Baseline_Efficient()
    Baseline_model_wide = Baseline_WideResNet()
    
    Baseline_model_res.load_state_dict(torch.load(model_res_path))
    Baseline_model_eff.load_state_dict(torch.load(model_eff_path))
    Baseline_model_wide.load_state_dict(torch.load(model_wide_path))
    
    Baseline_model_res = Baseline_model_res.to(device)
    Baseline_model_eff = Baseline_model_eff.to(device)
    Baseline_model_wide = Baseline_model_wide.to(device)
    
    softmax = nn.Softmax(dim=1)
    predictions = []
    image_names = []
    with torch.no_grad():
        Baseline_model_res = Baseline_model_res.eval()
        for sample in tqdm(unlabelled_dataloader):
            images = sample['images'].to(device)
            output = softmax(Baseline_model_res(images))
            predictions.append(output.cpu().numpy())
            image_names.append(sample['image_names'])
            
    predictions_a_np = predictions[0]
    for i in range(1, len(predictions)):
        predictions_a_np = np.concatenate((predictions_a_np, predictions[i]))
        
        
    predictions = []
    with torch.no_grad():
        Baseline_model_eff = Baseline_model_eff.eval()
        for sample in tqdm(unlabelled_dataloader):
            images = sample['images'].to(device)
            output = softmax(Baseline_model_eff(images))
            predictions.append(output.cpu().numpy())
            
    predictions_b_np = predictions[0]
    for i in range(1, len(predictions)):
        predictions_b_np = np.concatenate((predictions_b_np, predictions[i]))
        
        
    predictions = []
    with torch.no_grad():
        Baseline_model_wide = Baseline_model_wide.eval()
        for sample in tqdm(unlabelled_dataloader):
            images = sample['images'].to(device)
            output = softmax(Baseline_model_wide(images))
            predictions.append(output.cpu().numpy())
            
    predictions_c_np = predictions[0]
    for i in range(1, len(predictions)):
        predictions_c_np = np.concatenate((predictions_c_np, predictions[i]))
        
        
    ensemble_arr = np.mean((predictions_a_np, predictions_b_np, predictions_c_np), 0)
    sharpened_arr = np.apply_along_axis(sharpen, 1, ensemble_arr)
    image_names = list(itertools.chain.from_iterable(image_names))
    
    return sharpened_arr, image_names

In [None]:
# ensemble_arr = np.mean((predictions_a_np, predictions_b_np, predictions_c_np), 0)
# sharpened_arr = np.apply_along_axis(sharpen, 1, ensemble_arr)
# image_names = list(itertools.chain.from_iterable(image_names))

In [None]:
def entropy_fn(vector):
    return entropy(vector)

In [None]:
class PseudoLabelledDataset(Dataset):
    def __init__(self, data, predictions, image_names, topn, transforms=None):
        super(PseudoLabelledDataset, self).__init__()
        self.topn = topn
        self.data = data
        self.transforms = transforms
        self.predictions = predictions
        self.image_names = image_names
        self.sorted_dataframe = self.sort_by_entropy()
        self.sorted_dataframe = self.sorted_dataframe[:topn]
    def sort_by_entropy(self):
        sorted_dataframe = pd.DataFrame()
        sorted_dataframe['image_names'] = self.image_names
        sorted_dataframe['predictions'] = self.predictions.tolist()
        sorted_dataframe['entropy'] = np.apply_along_axis(entropy_fn, 1, self.predictions)
        sorted_dataframe = sorted_dataframe.sort_values(by='entropy', ascending=True).reset_index(drop=True)
        return sorted_dataframe
    
    def __len__(self):
        return self.topn

    def __getitem__(self, idx):
        image_name = self.sorted_dataframe.iloc[idx, 0]
        image_prediction = self.sorted_dataframe.iloc[idx, 1]
        image = self.data[idx]
        image = Image.fromarray(image.astype(np.uint8).transpose(1,2,0))
        if self.transforms:
            image = self.transforms(image)
        return {'images': image, 'labels': np.array(image_prediction, dtype=np.float32)}

In [None]:
sharpened_arr, image_names = pseudo_label(res_path, eff_path, wide_path, unlabelled_dataloader, device)
pseudo_labelled_dataset = PseudoLabelledDataset(x_unlabelled, predictions=sharpened_arr,
                                                image_names=image_names, topn=20000, transforms=train_transforms)

N = 16000
random_idx = random.sample(range(0, 20000), N)
sampled_pseudo_dataset = Subset(pseudo_labelled_dataset, random_idx)
sampled_pseudo_dataloader = DataLoader(sampled_pseudo_dataset, batch_size=batch_size, num_workers=4, pin_memory=True, shuffle=True)
print('sampled_pseudo_dataset Size', len(sampled_pseudo_dataset))

In [None]:
def accuracy(y_true, y_pred):
    return torch.sum(y_true == y_pred) / y_true.size(0)

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.count = 0
        self.sum = 0
        self.avg = 0
    def update(self, val, n):
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
def evaluate(model, dataloader, criterion, device, epoch, dataloader_size):
    model = model.eval()
    running_loss = AverageMeter()
    running_accuracy = AverageMeter()
    with torch.no_grad():
        for i, sample in enumerate(dataloader):
            inputs, labels = sample['images'].to(device), sample['labels'].to(device)
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            running_loss.update(loss.item(), inputs.size(0))
            running_accuracy.update(accuracy(y_true=labels, y_pred=predictions), inputs.size(0))
    return running_loss.avg, running_accuracy.avg

In [None]:
class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.base_model = torchvision.models.resnet18(pretrained=True)
        self.linear = nn.Linear(in_features=512, out_features=100)
        self.base_model.fc = self.linear

    def forward(self, x):
        x = self.base_model(x)
        return x

In [None]:
model_res = ResNet()
model_res.to(device)
lr = 0.0001
weight_decay = 0.00001
optimizer = optim.Adam(model_res.parameters(), lr = lr, weight_decay = weight_decay)

criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=[20, 30, 40], gamma=0.1)

In [None]:
def train(model, labelled_dataloader, pseudo_labelled_dataloader, optimizer, criterion1, criterion2, device, epoch, lambda_u, dataloader_size):
    softmax = nn.Softmax(dim=1)
    model = model.train()
    running_l2_loss = AverageMeter()
    running_total_loss = AverageMeter()
    running_cross_entropy_loss = AverageMeter()

    running_accuracy = AverageMeter()
    labelled_itr = iter(labelled_dataloader)
    pseudo_itr = iter(pseudo_labelled_dataloader)
    for i in range(dataloader_size):
        try:
            sample = labelled_itr.next()
            labelled_inputs, labelled_labels = sample['images'].to(device), sample['labels'].to(device)
        except StopIteration:
            labelled_itr = iter(labelled_dataloader)
            sample = labelled_itr.next()
            labelled_inputs, labelled_labels = sample['images'].to(device), sample['labels'].to(device)
        try:
            sample = pseudo_itr.next()
            pseudo_inputs, pseudo_labels = sample['images'].to(device), sample['labels'].to(device)
        except StopIteration:
            print('Something wrong')
            pseudo_itr = iter(pseudo_labelled_dataloader)
            sample = pseudo_itr.next()
            pseudo_inputs, pseudo_labels = sample['images'].to(device), sample['labels'].to(device)

        labelled_outputs = model(labelled_inputs)
        pseudo_outputs = softmax(model(pseudo_inputs))
        _, labelled_predictions = torch.max(labelled_outputs, 1)
        _, pseudo_predictions = torch.max(pseudo_outputs, 1)
        _, pseudo_one_hot_labels = torch.max(pseudo_labels, 1)
        loss1 = criterion1(labelled_outputs, labelled_labels)
        loss2 = criterion2(pseudo_outputs.float(), pseudo_labels.float())
        loss = loss1 + lambda_u * loss2
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_cross_entropy_loss.update(loss1.item(), labelled_inputs.size(0))
        running_l2_loss.update(loss2.item(), labelled_inputs.size(0))
        running_total_loss.update(loss.item(), 2 * labelled_inputs.size(0))
        running_accuracy.update(accuracy(y_true=labelled_labels, y_pred=labelled_predictions), labelled_inputs.size(0))
        running_accuracy.update(accuracy(y_true=pseudo_one_hot_labels, y_pred=pseudo_predictions),labelled_inputs.size(0))
    print('Training Cross Entorpy Loss {}'.format(running_cross_entropy_loss.avg))
    print('Training L2 Loss {}'.format(running_l2_loss.avg))
    print('Training accuracy {}'.format(running_accuracy.avg))
    print('T')

In [None]:
t_start = time.time()
counter = 0
best_loss = 100000
best_loss_acc = 0.0
epochs = 120
best_model = copy.deepcopy(model_res.state_dict())
for epoch in range(epochs):
    print("\n")
    print('Training Epoch {} __________'.format(epoch + 1))
    train(model=model_res, labelled_dataloader=train_dataloader, pseudo_labelled_dataloader=sampled_pseudo_dataloader,
          criterion1=criterion1, criterion2=criterion2, lambda_u=5, device=device, optimizer=optimizer, epoch=epoch,
          dataloader_size=len(sampled_pseudo_dataloader))
    
    test_epoch_loss, test_epoch_acc = evaluate(model=model_res, dataloader=test_dataloader, epoch=epoch,
                                               criterion=criterion1, dataloader_size=len(test_dataloader), device=device)
    lr_scheduler.step()
    if test_epoch_loss < best_loss:
        counter = 0
        best_loss = copy.deepcopy(test_epoch_loss)
        best_loss_acc = copy.deepcopy(test_epoch_acc)
        print('Best loss till now {:.4f}'.format(best_loss))
        print('valid accuracy for this loss {:.4f}'.format(best_loss_acc))
        print("saving the model")
        best_model = copy.deepcopy(model_res.state_dict())
        
    else:
        counter = counter + 1
        print("value of counter right now: ", counter)
        if counter >= 7:
            break
        else:
            pass

# print(best_loss_acc)
# Restoring best model
model_res.load_state_dict(best_model)

In [None]:
print(best_loss, best_loss_acc)

In [None]:
torch.save(model_res.state_dict(), 'res_model_no_aug_cifar100_10k_20k_lambda_5_sharp_03.pth')

In [None]:
def calculate_mean_confidence(predictions, labels):
    """Computes Average Probability, Calibration Error and Accuracy"""
    probabilities = np.max(predictions, 1)
    acc = (labels == np.argmax(predictions, 1)).sum()
    avg_prob = np.average(probabilities)
   
    return avg_prob, avg_prob - acc / labels.shape[0], acc / labels.shape[0]

In [None]:
predictions_res = []
labels_res = []

softmax = nn.Softmax(dim=1)
with torch.no_grad():
    model_res = model_res.eval()
    for sample in tqdm(test_dataloader):
        images = sample['images'].to(device)
        labels = sample['labels'].to(device)
        output = softmax(model_res(images))
        predictions_res.append(output.cpu().numpy())
        labels_res.append(labels.cpu().numpy())
        
print(len(labels_res), len(predictions_res))
print(predictions_res[0].shape, labels_res[0].shape)

In [None]:
predictions_res_final = predictions_res[0]
labels_res_final = labels_res[0]
for i in range(1, len(predictions_res)):
    predictions_res_final = np.concatenate((predictions_res_final, predictions_res[i]))
    labels_res_final = np.concatenate((labels_res_final, labels_res[i]))

In [None]:
conf_score, cal_err, acc = calculate_mean_confidence(predictions_res_final, labels_res_final)
print(conf_score, cal_err, acc)