In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10, STL10, CIFAR100
import os
import pickle
import zipfile
import datetime
import torch.utils.data as tud

In [2]:
torch.manual_seed(4)
np.random.seed(4)

# Data Preparation:

In [3]:
mean,std = (0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)
data_transform = transforms.Compose([ transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean,std),
                transforms.Resize((96,96))
        ])




In [4]:
train = CIFAR100(root='./data', train=True,download=True, transform=data_transform)
test = torchvision.datasets.CIFAR100(root='./data', train=False,download=True, transform=data_transform)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
test

Dataset CIFAR100
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: Compose(
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.5071, 0.4867, 0.4408), std=(0.2675, 0.2565, 0.2761))
               Resize(size=(96, 96), interpolation=bilinear, max_size=None, antialias=None)
           )

In [6]:
train_loader = torch.utils.data.DataLoader(train, batch_size=50000, shuffle=False, num_workers=0)

In [7]:
data, labels= next(iter(train_loader))

## Using 500 labled data for 1% and 5000 labeled data for 10% case

In [8]:
#keep 1500 as labeled data
np.random.seed(5)
labeled_ind = np.random.choice(50000,7000, replace = False)

In [9]:
unlabeled_ind = np.setdiff1d(list(range(50000)), labeled_ind)

In [10]:
unlabeled_ind.shape

(43000,)

In [11]:
labels = labels.numpy()

In [12]:
#unlabeled data coded as 10
np.put(labels,list(unlabeled_ind),100)

In [13]:
#make 0.3 of the labeled data dev set, dev set is made sure to have balanced labels
np.random.seed(5)
dev_ind = labeled_ind[np.random.choice(7000,2000, replace = False)]

In [14]:
train_ind = np.setdiff1d(list(range(50000)), dev_ind)

In [15]:
#450 labeled data for dev set, 1050 labeled data + 6500 unlabeled data for training set
len(dev_ind), len(train_ind)

(2000, 48000)

In [16]:
#prepare dataloader for pytorch
class TorchInputData(tud.Dataset):
    """
    A simple inheretance of torch.DataSet to enable using our customized DogBreed dataset in torch
    """
    def __init__(self, X, Y, transform=None):
        """
        X: a list of numpy images 
        Y: a list of labels coded using 0-9 
        """        
        self.X = X
        self.Y = Y 

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.Y[idx]

        return x, y

In [17]:
images_train = [data[i] for i in train_ind]
trainset = TorchInputData(images_train, labels[train_ind])
train_loader = tud.DataLoader(trainset, batch_size=128, shuffle=True)

In [18]:
len(trainset)

48000

In [19]:
images_dev = [data[i] for i in dev_ind]
devset = TorchInputData(images_dev, labels[dev_ind])
dev_loader = tud.DataLoader(devset, batch_size=128, shuffle=True)

In [20]:
len(devset)

2000

# M2 Model:

In [21]:
#import nn.module for M2 and CNN classifier
from m2_cifar100_cuda import M2,Classifier

In [22]:

from torch.nn.utils import weight_norm

class Discriminator(nn.Module):
        """docstring for Discriminator"""
        def __init__(self,num_classes):
            super(Discriminator, self).__init__()
            self.net = nn.Sequential(
                    weight_norm(nn.Conv2d(3,3,3,stride=3,padding=1)),
                    nn.Dropout(.2),
                    weight_norm(nn.Conv2d(3,96,3,stride=1,padding=1)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(96,96,3,stride=1,padding=1)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(96,96,3,stride=2,padding=1)),
                    nn.LeakyReLU(),

                    nn.Dropout(.5),
                    weight_norm(nn.Conv2d(96,192,3,stride=1,padding=1)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(192,192,3,stride=1,padding=1)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(192,192,3,stride=2,padding=1)),
                    nn.LeakyReLU(),
                    
                    nn.Dropout(.5),
                    weight_norm(nn.Conv2d(192,192,3,stride=1,padding=0)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(192,192,1,stride=1,padding=0)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(192,192,1,stride=1,padding=0)),
                    nn.LeakyReLU(),

                    # nn.AvgPool2d(6,stride=1),
                    nn.AdaptiveAvgPool2d(1),
                    nn.Flatten()
                )

            self.fc = weight_norm(nn.Linear(192,num_classes))
            
        def forward(self,x):
            inter_layer = self.net(x)
            logits = F.log_softmax(self.fc(inter_layer),dim=1)
            return logits
lr = 0.01
num_epochs = 50

classifier = Discriminator(100)


In [23]:
#from torchvision import models

In [24]:
'''
classifier = models.resnet50(pretrained=False)
classifier.fc = nn.Linear(2048, 100)

gpu_boole = torch.cuda.is_available()
if gpu_boole:
  model.cuda()
  '''

'\nclassifier = models.resnet50(pretrained=False)\nclassifier.fc = nn.Linear(2048, 100)\n\ngpu_boole = torch.cuda.is_available()\nif gpu_boole:\n  model.cuda()\n  '

In [25]:
#classifier = Classifier(image_reso = 96, filter_size = 5, dropout_rate = 0.2)

In [26]:
m2 = M2(latent_features = 512, classifier = classifier, path = "m2_stl10_0.1_50epoch_5.pth")

### Structure of the M2 model: a convolutional variational autoencoder and a CNN classifier

In [27]:
m2.model

M2_base(
  (encoder): Encoder(
    (bottle): EncoderModule(
      (conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1))
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (m1): EncoderModule(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (m2): EncoderModule(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(4, 4), padding=(1, 1))
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (m3): EncoderModule(
      (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(4, 4), padding=(1, 1))
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (fc1): Linear(in_features=9

### Training the M2 model for 50 epochs:

In [28]:
#set alpha, hyperparameter for weighing the classifier loss
alpha = 0.1*len(train_loader.dataset)

In [29]:
#fit M2 model
#labeled_data_len is the number of labeled data in train+dev set: 450+1050
m2.fit(train_loader,dev_loader,100,alpha,labeled_data_len = 7000)

0 375 Loss: -64158.160156
50 375 Loss: -91637.438496
100 375 Loss: -99846.367536
150 375 Loss: -104441.251992
200 375 Loss: -107884.501419
250 375 Loss: -110820.401628
300 375 Loss: -113254.212093
350 375 Loss: -115500.060820
Epoch: 1, train loss: -116512.4474, training accuracy 0.0122, dev set accuracy 0.0090
0 375 Loss: -131515.000000
50 375 Loss: -133964.892463
100 375 Loss: -136234.912593
150 375 Loss: -138396.559706
200 375 Loss: -141431.287469
250 375 Loss: -146281.195219
300 375 Loss: -151316.829111
350 375 Loss: -156592.872730
Epoch: 2, train loss: -158393.7778, training accuracy 0.0216, dev set accuracy 0.0180
0 375 Loss: -183224.843750
50 375 Loss: -195566.134498
100 375 Loss: -197889.176361
150 375 Loss: -201114.724441
200 375 Loss: -203684.087142
250 375 Loss: -206042.022535
300 375 Loss: -208781.096397
350 375 Loss: -211061.767094
Epoch: 3, train loss: -212000.8136, training accuracy 0.0282, dev set accuracy 0.0310
0 375 Loss: -247946.125000
50 375 Loss: -229445.970895
100

In [30]:
#best dev set accuracy 
m2.model.best_dev_accuracy

0.1885

# Baseline Model:
### Only using the labeled data for supervised learning

In [31]:
#use the same dev set as M2
dev_ind_b = dev_ind
#training data is the same 1050 labeled data as M2
train_ind_b = (np.setdiff1d(labeled_ind, dev_ind))

In [32]:
len(dev_ind_b),len(train_ind_b)

(2000, 5000)

In [33]:
#prepare dataloader for pytroch
images_train_b = [data[i] for i in train_ind_b]
trainset_b = TorchInputData(images_train_b, labels[train_ind_b])
train_loader_b = tud.DataLoader(trainset_b, batch_size=50, shuffle=True)

In [34]:
images_dev_b = [data[i] for i in dev_ind_b]
devset_b = TorchInputData(images_dev_b, labels[dev_ind_b])
dev_loader_b = tud.DataLoader(devset_b, batch_size=50, shuffle=True)

In [35]:
from dcganbaseline_cnn_cifar100_cuda import BaselineConvNetdc

In [36]:
baseline = BaselineConvNetdc(96, path = "baseline_stl10_100epoch_5.pth")

In [37]:
from baseline_cnn_cifar100_cuda import BaselineConvNet

In [38]:
baseline2 = BaselineConvNet(96, path = "baseline2_stl10_100epoch_5.pth")

### Structure of the baseline model: same as the classifier in the M2 model

In [39]:
baseline.model

TwoLayerConvNet(
  (net): Sequential(
    (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (1): Dropout(p=0.2, inplace=False)
    (2): Conv2d(3, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): LeakyReLU(negative_slope=0.01)
    (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): LeakyReLU(negative_slope=0.01)
    (6): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (7): LeakyReLU(negative_slope=0.01)
    (8): Dropout(p=0.5, inplace=False)
    (9): Conv2d(96, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): LeakyReLU(negative_slope=0.01)
    (11): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): LeakyReLU(negative_slope=0.01)
    (13): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (14): LeakyReLU(negative_slope=0.01)
    (15): Dropout(p=0.5, inplace=False)
    (16): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1))
    (17): 

In [40]:
baseline2.model

TwoLayerConvNet(
  (conv1_drop): Dropout2d(p=0.2, inplace=False)
  (conv1): Conv2d(3, 10, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2_drop): Dropout2d(p=0.2, inplace=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn2): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (avgpool): AvgPool2d(kernel_size=4, stride=4, padding=0)
  (fc1): Linear(in_features=2880, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=100, bias=True)
)

In [41]:
baseline.fit(train_loader_b,dev_loader_b)
baseline.train(50)






Dev set: Average loss: 4.5218, Accuracy: 39/2000 (2%)


Dev set: Average loss: 4.3668, Accuracy: 51/2000 (3%)


Dev set: Average loss: 4.3289, Accuracy: 52/2000 (3%)


Dev set: Average loss: 4.2520, Accuracy: 61/2000 (3%)


Dev set: Average loss: 4.2114, Accuracy: 65/2000 (3%)


Dev set: Average loss: 4.1517, Accuracy: 86/2000 (4%)


Dev set: Average loss: 4.1676, Accuracy: 83/2000 (4%)


Dev set: Average loss: 4.0850, Accuracy: 111/2000 (6%)


Dev set: Average loss: 4.0910, Accuracy: 111/2000 (6%)


Dev set: Average loss: 4.0543, Accuracy: 120/2000 (6%)


Dev set: Average loss: 3.9685, Accuracy: 141/2000 (7%)


Dev set: Average loss: 3.9521, Accuracy: 147/2000 (7%)


Dev set: Average loss: 3.9226, Accuracy: 157/2000 (8%)


Dev set: Average loss: 3.9248, Accuracy: 155/2000 (8%)


Dev set: Average loss: 3.8859, Accuracy: 188/2000 (9%)


Dev set: Average loss: 3.8958, Accuracy: 137/2000 (7%)


Dev set: Average loss: 3.8295, Accuracy: 190/2000 (10%)


Dev set: Average loss: 3.7950, Accur

In [42]:
baseline.model.best_dev_accuracy

0.1555

In [43]:
baseline2.fit(train_loader_b,dev_loader_b)
baseline2.train(50)






Dev set: Average loss: 4.5833, Accuracy: 32/2000 (2%)


Dev set: Average loss: 4.5237, Accuracy: 54/2000 (3%)


Dev set: Average loss: 4.4608, Accuracy: 75/2000 (4%)


Dev set: Average loss: 4.3779, Accuracy: 89/2000 (4%)


Dev set: Average loss: 4.3202, Accuracy: 130/2000 (6%)


Dev set: Average loss: 4.2249, Accuracy: 148/2000 (7%)


Dev set: Average loss: 4.1647, Accuracy: 171/2000 (9%)


Dev set: Average loss: 4.0813, Accuracy: 185/2000 (9%)


Dev set: Average loss: 4.0554, Accuracy: 214/2000 (11%)


Dev set: Average loss: 4.0003, Accuracy: 213/2000 (11%)


Dev set: Average loss: 3.9937, Accuracy: 222/2000 (11%)


Dev set: Average loss: 4.0333, Accuracy: 214/2000 (11%)


Dev set: Average loss: 3.9935, Accuracy: 240/2000 (12%)


Dev set: Average loss: 4.0163, Accuracy: 256/2000 (13%)


Dev set: Average loss: 3.9799, Accuracy: 250/2000 (12%)


Dev set: Average loss: 4.0277, Accuracy: 235/2000 (12%)


Dev set: Average loss: 4.2108, Accuracy: 229/2000 (11%)


Dev set: Average loss: 4.

In [44]:
#best dev set accuracy 
baseline2.model.best_dev_accuracy

0.1385

# Test Set Performance: 
### The M2 model successfully increase the accuracy of the classifier

In [45]:
#The testset dataloader
testset_loader = torch.utils.data.DataLoader(test, batch_size=1000, shuffle=True, num_workers=0)

In [46]:
conf_b, acc_b = baseline2.test(testset_loader,path = "baseline2_stl10_100epoch_5.pth",return_confusion_matrix = True)


Test set: Accuracy: 1354/10000 (14%)



In [47]:
conf_b, acc_b = baseline.test(testset_loader,path = "baseline_stl10_100epoch_5.pth",return_confusion_matrix = True)


Test set: Accuracy: 1576/10000 (16%)



In [48]:
conf, acc = m2.test(testset_loader,path = "m2_stl10_0.1_50epoch_5.pth",return_confusion_matrix = True)


Test set: Accuracy: 1894/10000 (19%)

