In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10, STL10
import os
import pickle
import zipfile
import datetime
import torch.utils.data as tud

In [2]:
torch.manual_seed(0)
np.random.seed(0)

# Data Preparation:

In [3]:
mean,std = (0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)
data_transform = transforms.Compose([ transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean,std),
                transforms.Resize((96,96))
        ])

In [4]:
train = CIFAR10(root='./data', train=True,download=True, transform=data_transform)
test = torchvision.datasets.CIFAR10(root='./data', train=False,download=True, transform=data_transform)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
test

Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: Compose(
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.5071, 0.4867, 0.4408), std=(0.2675, 0.2565, 0.2761))
               Resize(size=(96, 96), interpolation=bilinear, max_size=None, antialias=None)
           )

In [6]:
train_loader = torch.utils.data.DataLoader(train, batch_size=50000, shuffle=False, num_workers=0)

In [7]:
data, labels= next(iter(train_loader))

## Using 500 labled data for 1% and 5000 labeled data for 10% case

In [8]:
#keep 2500 as labeled data
np.random.seed(5)
labeled_ind = np.random.choice(50000,7000, replace = False)

In [9]:
unlabeled_ind = np.setdiff1d(list(range(50000)), labeled_ind)

In [10]:
unlabeled_ind.shape

(43000,)

In [11]:
labels = labels.numpy()

In [12]:
#unlabeled data coded as 10
np.put(labels,list(unlabeled_ind),10)

In [13]:
#make 0.3 of the labeled data dev set, dev set is made sure to have balanced labels
np.random.seed(5)
dev_ind = labeled_ind[np.random.choice(7000,2000, replace = False)]

In [14]:
train_ind = np.setdiff1d(list(range(50000)), dev_ind)

In [15]:
#450 labeled data for dev set, 1050 labeled data + 6500 unlabeled data for training set
len(dev_ind), len(train_ind)

(2000, 48000)

In [16]:
#prepare dataloader for pytorch
class TorchInputData(tud.Dataset):
    """
    A simple inheretance of torch.DataSet to enable using our customized DogBreed dataset in torch
    """
    def __init__(self, X, Y, transform=None):
        """
        X: a list of numpy images 
        Y: a list of labels coded using 0-9 
        """        
        self.X = X
        self.Y = Y 

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.Y[idx]

        return x, y

In [17]:
images_train = [data[i] for i in train_ind]
trainset = TorchInputData(images_train, labels[train_ind])
train_loader = tud.DataLoader(trainset, batch_size=100, shuffle=True)

In [18]:
len(trainset)

48000

In [19]:
images_dev = [data[i] for i in dev_ind]
devset = TorchInputData(images_dev, labels[dev_ind])
dev_loader = tud.DataLoader(devset, batch_size=100, shuffle=True)

In [20]:
len(devset)

2000

# M2 Model:

In [21]:
#import nn.module for M2 and CNN classifier
from m2_stl10_cuda_clipped_logvar import M2,Classifier

In [22]:

from torch.nn.utils import weight_norm

class Discriminator(nn.Module):
        """docstring for Discriminator"""
        def __init__(self,num_classes):
            super(Discriminator, self).__init__()
            self.net = nn.Sequential(
                    weight_norm(nn.Conv2d(3,3,3,stride=3,padding=1)),
                    nn.Dropout(.2),
                    weight_norm(nn.Conv2d(3,96,3,stride=1,padding=1)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(96,96,3,stride=1,padding=1)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(96,96,3,stride=2,padding=1)),
                    nn.LeakyReLU(),

                    nn.Dropout(.5),
                    weight_norm(nn.Conv2d(96,192,3,stride=1,padding=1)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(192,192,3,stride=1,padding=1)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(192,192,3,stride=2,padding=1)),
                    nn.LeakyReLU(),
                    
                    nn.Dropout(.5),
                    weight_norm(nn.Conv2d(192,192,3,stride=1,padding=0)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(192,192,1,stride=1,padding=0)),
                    nn.LeakyReLU(),
                    weight_norm(nn.Conv2d(192,192,1,stride=1,padding=0)),
                    nn.LeakyReLU(),

                    # nn.AvgPool2d(6,stride=1),
                    nn.AdaptiveAvgPool2d(1),
                    nn.Flatten()
                )

            self.fc = weight_norm(nn.Linear(192,num_classes))
            
        def forward(self,x):
            inter_layer = self.net(x)
            logits = F.log_softmax(self.fc(inter_layer),dim=1)
            return logits
lr = 0.01
num_epochs = 50

classifier = Discriminator(10)


In [23]:
'''
from torchvision import models
classifier = models.resnet50(pretrained=False)
classifier.fc = nn.Linear(2048, 10)

gpu_boole = torch.cuda.is_available()
if gpu_boole:
  classifier.cuda()
  '''

'\nfrom torchvision import models\nclassifier = models.resnet50(pretrained=False)\nclassifier.fc = nn.Linear(2048, 10)\n\ngpu_boole = torch.cuda.is_available()\nif gpu_boole:\n  classifier.cuda()\n  '

In [24]:
#for param in classifier.parameters():
#    param.requires_grad = False

In [25]:
#classifier.fc.requires_grad=True

In [26]:
#classifier = Classifier(image_reso = 96, filter_size = 5, dropout_rate = 0.2)

In [27]:
m2 = M2(latent_features = 128, classifier = classifier, path = "m2_stl10_0.1_50epoch_5.pth")

### Structure of the M2 model: a convolutional variational autoencoder and a CNN classifier

In [28]:
m2.model

M2_base(
  (encoder): Encoder(
    (bottle): EncoderModule(
      (conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1))
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (m1): EncoderModule(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (m2): EncoderModule(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(4, 4), padding=(1, 1))
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (m3): EncoderModule(
      (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(4, 4), padding=(1, 1))
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (fc1): Linear(in_features=9

### Training the M2 model for 50 epochs:

In [29]:
#set alpha, hyperparameter for weighing the classifier loss
alpha = 0.1*len(train_loader.dataset)

In [30]:
#fit M2 model
#labeled_data_len is the number of labeled data in train+dev set: 450+1050
m2.fit(train_loader,dev_loader,100,alpha,labeled_data_len = 7000)

0 480 Loss: 47512.171875
50 480 Loss: 19478.032686
100 480 Loss: 12723.287719
150 480 Loss: 8947.018377
200 480 Loss: 6147.081917
250 480 Loss: 3009.350109
300 480 Loss: -919.160390
350 480 Loss: -5604.619782
400 480 Loss: -10392.329972
450 480 Loss: -14760.652102
Epoch: 1, train loss: -17088.9885, training accuracy 0.0972, dev set accuracy 0.0975
0 480 Loss: -54450.218750
50 480 Loss: -58579.954082
100 480 Loss: -62021.118019
150 480 Loss: -63137.539127
200 480 Loss: -65725.385465
250 480 Loss: -67540.162203
300 480 Loss: -70006.230008
350 480 Loss: -72063.844045
400 480 Loss: -74535.582523
450 480 Loss: -76866.551894
Epoch: 2, train loss: -78273.5599, training accuracy 0.1076, dev set accuracy 0.1730
0 480 Loss: -70897.210938
50 480 Loss: -105931.836320
100 480 Loss: -106395.759708
150 480 Loss: -106113.709204
200 480 Loss: -107258.172944
250 480 Loss: -109365.369164
300 480 Loss: -111597.745081
350 480 Loss: -113801.050069
400 480 Loss: -117369.156143
450 480 Loss: -119134.012273
Ep

In [31]:
#best dev set accuracy 
m2.model.best_dev_accuracy

0.5465

# Baseline Model:
### Only using the labeled data for supervised learning

In [32]:
#use the same dev set as M2
dev_ind_b = dev_ind
#training data is the same 1050 labeled data as M2
train_ind_b = (np.setdiff1d(labeled_ind, dev_ind))

In [33]:
len(dev_ind_b),len(train_ind_b)

(2000, 5000)

In [34]:
#prepare dataloader for pytroch
images_train_b = [data[i] for i in train_ind_b]
trainset_b = TorchInputData(images_train_b, labels[train_ind_b])
train_loader_b = tud.DataLoader(trainset_b, batch_size=50, shuffle=True)

In [35]:
images_dev_b = [data[i] for i in dev_ind_b]
devset_b = TorchInputData(images_dev_b, labels[dev_ind_b])
dev_loader_b = tud.DataLoader(devset_b, batch_size=50, shuffle=True)

In [36]:
from dcganbaseline_cnn_stl10_cuda import BaselineConvNetdc

In [37]:
baseline = BaselineConvNetdc(96, path = "baseline_stl10_100epoch_5.pth")

In [38]:
from baseline_cnn_stl10_cuda import BaselineConvNet

In [39]:
baseline2 = BaselineConvNet(96, path = "baseline2_stl10_100epoch_5.pth")

### Structure of the baseline model: same as the classifier in the M2 model

In [40]:
baseline.model

TwoLayerConvNet(
  (net): Sequential(
    (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (1): Dropout(p=0.2, inplace=False)
    (2): Conv2d(3, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): LeakyReLU(negative_slope=0.01)
    (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): LeakyReLU(negative_slope=0.01)
    (6): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (7): LeakyReLU(negative_slope=0.01)
    (8): Dropout(p=0.5, inplace=False)
    (9): Conv2d(96, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): LeakyReLU(negative_slope=0.01)
    (11): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): LeakyReLU(negative_slope=0.01)
    (13): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (14): LeakyReLU(negative_slope=0.01)
    (15): Dropout(p=0.5, inplace=False)
    (16): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1))
    (17): 

In [41]:
baseline2.model

TwoLayerConvNet(
  (conv1_drop): Dropout2d(p=0.2, inplace=False)
  (conv1): Conv2d(3, 10, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2_drop): Dropout2d(p=0.2, inplace=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn2): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (avgpool): AvgPool2d(kernel_size=4, stride=4, padding=0)
  (fc1): Linear(in_features=2880, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [42]:
baseline2.fit(train_loader_b,dev_loader_b)
baseline2.train(50)






Dev set: Average loss: 2.0497, Accuracy: 535/2000 (27%)


Dev set: Average loss: 1.8191, Accuracy: 704/2000 (35%)


Dev set: Average loss: 1.7482, Accuracy: 742/2000 (37%)


Dev set: Average loss: 1.6751, Accuracy: 766/2000 (38%)


Dev set: Average loss: 1.6519, Accuracy: 780/2000 (39%)


Dev set: Average loss: 1.6095, Accuracy: 851/2000 (43%)


Dev set: Average loss: 1.6232, Accuracy: 848/2000 (42%)


Dev set: Average loss: 1.5718, Accuracy: 880/2000 (44%)


Dev set: Average loss: 1.5771, Accuracy: 875/2000 (44%)


Dev set: Average loss: 1.6059, Accuracy: 861/2000 (43%)


Dev set: Average loss: 1.5735, Accuracy: 916/2000 (46%)


Dev set: Average loss: 1.5908, Accuracy: 897/2000 (45%)


Dev set: Average loss: 1.6606, Accuracy: 893/2000 (45%)


Dev set: Average loss: 1.7173, Accuracy: 875/2000 (44%)


Dev set: Average loss: 1.7769, Accuracy: 890/2000 (44%)


Dev set: Average loss: 1.7280, Accuracy: 862/2000 (43%)


Dev set: Average loss: 1.8194, Accuracy: 905/2000 (45%)


Dev set: Aver

In [43]:
baseline.fit(train_loader_b,dev_loader_b)
baseline.train(60)






Dev set: Average loss: 2.0842, Accuracy: 374/2000 (19%)


Dev set: Average loss: 1.9530, Accuracy: 505/2000 (25%)


Dev set: Average loss: 1.8792, Accuracy: 545/2000 (27%)


Dev set: Average loss: 1.8460, Accuracy: 537/2000 (27%)


Dev set: Average loss: 1.8270, Accuracy: 557/2000 (28%)


Dev set: Average loss: 1.7820, Accuracy: 595/2000 (30%)


Dev set: Average loss: 1.7918, Accuracy: 582/2000 (29%)


Dev set: Average loss: 1.6904, Accuracy: 647/2000 (32%)


Dev set: Average loss: 1.6774, Accuracy: 724/2000 (36%)


Dev set: Average loss: 1.6501, Accuracy: 720/2000 (36%)


Dev set: Average loss: 1.6817, Accuracy: 689/2000 (34%)


Dev set: Average loss: 1.6451, Accuracy: 701/2000 (35%)


Dev set: Average loss: 1.6180, Accuracy: 754/2000 (38%)


Dev set: Average loss: 1.6286, Accuracy: 781/2000 (39%)


Dev set: Average loss: 1.6160, Accuracy: 747/2000 (37%)


Dev set: Average loss: 1.5602, Accuracy: 810/2000 (40%)


Dev set: Average loss: 1.5709, Accuracy: 820/2000 (41%)


Dev set: Aver

In [44]:
#best dev set accuracy 
baseline.model.best_dev_accuracy

0.5605

In [45]:
baseline2.model.best_dev_accuracy

0.466

# Test Set Performance: 
### The M2 model successfully increase the accuracy of the classifier

In [46]:
#The testset dataloader
testset_loader = torch.utils.data.DataLoader(test, batch_size=500, shuffle=True, num_workers=0)

In [47]:
conf_b, acc_b = baseline.test(testset_loader,path = "baseline_stl10_100epoch_5.pth",return_confusion_matrix = True)


Test set: Accuracy: 5486/10000 (55%)



In [48]:
conf_b2, acc_b2 = baseline2.test(testset_loader,path = "baseline2_stl10_100epoch_5.pth",return_confusion_matrix = True)


Test set: Accuracy: 4641/10000 (46%)



In [49]:
conf, acc = m2.test(testset_loader,path = "m2_stl10_0.1_50epoch_5.pth",return_confusion_matrix = True)


Test set: Accuracy: 5405/10000 (54%)

