In [3]:
%matplotlib inline
# from MulticoreTSNE import MulticoreTSNE as TSNE
from matplotlib import pyplot as plt
import torch
from torchvision import datasets, transforms
from torch import nn
import torch.nn.functional as F
import numpy as np

torch.manual_seed(42)
np.random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [4]:
import pandas as pd 

# UNLABELED_BS = 256
# TRAIN_BS = 32
# TEST_BS = 1024

UNLABELED_BS = 32
TRAIN_BS = 32
TEST_BS = 32

num_train_samples = 1000
samples_per_class = int(num_train_samples/9)

x = pd.read_csv('data/mnist_train.csv')
y = x['label']
x.drop(['label'], inplace = True, axis = 1)

x_test = pd.read_csv('data/mnist_test.csv')
y_test = x_test['label']
x_test.drop(['label'], inplace = True, axis = 1)

Now, lets divide the dataset into train and unlabeled sets. For the train set we'll make sure that we have equal samples for all the 10 classes. (class-balancing)

We wont use the labels for the unlabeled set.

In [5]:
x_train, x_unlabeled = x[y.values == 0].values[:samples_per_class], \
                            x[y.values == 0].values[samples_per_class:]
y_train = y[y.values == 0].values[:samples_per_class]

for i in range(1,10):
    x_train = np.concatenate([x_train, x[y.values == i].values[:samples_per_class]], axis = 0)
    y_train = np.concatenate([y_train, y[y.values == i].values[:samples_per_class]], axis = 0)
    
    x_unlabeled = np.concatenate([x_unlabeled, x[y.values == i].values[samples_per_class:]], axis = 0)

In [6]:
# check that awe have balance

from collections import Counter
count = Counter()
count.update(y_train)
count

Counter({0: 111,
         1: 111,
         2: 111,
         3: 111,
         4: 111,
         5: 111,
         6: 111,
         7: 111,
         8: 111,
         9: 111})

In [7]:
x_train.shape

(1110, 784)

In [8]:
x_unlabeled.shape

(58890, 784)

Next, we'll normalize the data, convert it into tensors and create the dataloaders for train, unlabeled and test sets. 

In [9]:
from sklearn.preprocessing import Normalizer

normalizer = Normalizer()
x_train = normalizer.fit_transform(x_train)
x_unlabeled = normalizer.transform(x_unlabeled)
x_test = normalizer.transform(x_test.values)

In [10]:
x_train = torch.from_numpy(x_train).type(torch.FloatTensor)
y_train = torch.from_numpy(y_train).type(torch.LongTensor) 

x_test = torch.from_numpy(x_test).type(torch.FloatTensor)
y_test = torch.from_numpy(y_test.values).type(torch.LongTensor) 

In [14]:
train = torch.utils.data.TensorDataset(x_train, y_train)
test = torch.utils.data.TensorDataset(x_test, y_test)

train_loader = torch.utils.data.DataLoader(train, batch_size = TRAIN_BS, shuffle = True, num_workers = 8)

unlabeled_train = torch.from_numpy(x_unlabeled).type(torch.FloatTensor)

unlabeled = torch.utils.data.TensorDataset(unlabeled_train)

unlabeled_loader = torch.utils.data.DataLoader(unlabeled, batch_size = UNLABELED_BS, shuffle = True, num_workers = 8)

test_loader = torch.utils.data.DataLoader(test, batch_size = TEST_BS, shuffle = True, num_workers = 8)

### Network Architecture

We'll use a simple 2 layer Conv + 2 FC layer network with dropouts.

In [27]:
# Architecture from : https://github.com/peimengsui/semi_supervised_mnist
class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(1, 20, kernel_size=5)
            self.conv2 = nn.Conv2d(20, 40, kernel_size=5)
            self.conv2_drop = nn.Dropout2d()
            self.fc1 = nn.Linear(640, 150)
            self.fc2 = nn.Linear(150, 10)
            self.log_softmax = nn.LogSoftmax(dim = 1)

        def forward(self, x):
            x = x.view(-1,1,28,28)
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
            x = x.view(-1, 640)
            x = F.relu(self.fc1(x))
            x = F.dropout(x, training=self.training)
            x = F.relu(self.fc2(x))
            x = self.log_softmax(x)
            return x

device = 'cuda' if torch.cuda.is_available() else 'cpu'

net = Net().to(device)

Now let's define a function to evaluate the network and get loss and accuracy values. 

In [28]:
def evaluate(model, test_loader):
    model.eval()
    correct = 0 
    loss = 0
    with torch.no_grad():
        for data, labels in test_loader:
            data = data.to(device)
            labels = labels.to(device)
            
            output = model(data)
            predicted = torch.max(output,1)[1]
            correct += (predicted == labels).sum()
            loss += F.nll_loss(output, labels).item()

    return (float(correct)/len(test)) *100, (loss/len(test_loader))

First, let's train the model on the labeled set for 300 epochs

In [29]:
# from tqdm import tqdm.notebook.tqdm
from tqdm.notebook import tqdm

def train_supervised(model, train_loader, test_loader):
    optimizer = torch.optim.SGD( model.parameters(), lr = 0.1)
    EPOCHS = 100
    model.train()
    for epoch in tqdm(range(EPOCHS)):
        correct = 0
        running_loss = 0
        for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            output = model(X_batch)
            labeled_loss = F.nll_loss(output, y_batch)
                       
            optimizer.zero_grad()
            labeled_loss.backward()
            optimizer.step()
            running_loss += labeled_loss.item()
        
        if epoch %10 == 0:
            test_acc, test_loss = evaluate(model, test_loader)
            print('Epoch: {} : Train Loss : {:.5f} | Test Acc : {:.5f} | Test Loss : {:.3f} '.format(epoch, running_loss/(10 * len(train)), test_acc, test_loss))
            model.train()
    

In [30]:
train_supervised(net, train_loader, test_loader)

HBox(children=(FloatProgress(value=0.0), HTML(value='')))

Epoch: 0 : Train Loss : 0.00726 | Test Acc : 9.80000 | Test Loss : 2.303 
Epoch: 10 : Train Loss : 0.00725 | Test Acc : 10.33000 | Test Loss : 2.297 
Epoch: 20 : Train Loss : 0.00500 | Test Acc : 64.78000 | Test Loss : 1.277 
Epoch: 30 : Train Loss : 0.00171 | Test Acc : 87.85000 | Test Loss : 0.378 
Epoch: 40 : Train Loss : 0.00103 | Test Acc : 91.62000 | Test Loss : 0.253 
Epoch: 50 : Train Loss : 0.00070 | Test Acc : 93.25000 | Test Loss : 0.209 
Epoch: 60 : Train Loss : 0.00055 | Test Acc : 94.15000 | Test Loss : 0.190 
Epoch: 70 : Train Loss : 0.00044 | Test Acc : 94.16000 | Test Loss : 0.191 
Epoch: 80 : Train Loss : 0.00038 | Test Acc : 94.45000 | Test Loss : 0.183 
Epoch: 90 : Train Loss : 0.00031 | Test Acc : 94.61000 | Test Loss : 0.189 



In [31]:
test_acc, test_loss = evaluate(net, test_loader)
print('Test Acc : {:.5f} | Test Loss : {:.3f} '.format(test_acc, test_loss))
torch.save(net.state_dict(), 'supervised_weights')

Test Acc : 94.56000 | Test Loss : 0.198 


In [32]:
net.load_state_dict(torch.load('supervised_weights'))

<All keys matched successfully>

[Lee in 2013](http://deeplearning.net/wp-content/uploads/2013/03/pseudo_label_final.pdf) [1]
Lee proposes using the following equation for alpha (t) 

where alpha_f = 3, T1 = 100 and T2 = 600. All of these are hyperparameters that change based on the model and the dataset.

In [33]:
T1 = 100
T2 = 700
af = 3

def alpha_weight(epoch):
    if epoch < T1:
        return 0.0
    elif epoch > T2:
        return af
    else:
         return ((epoch-T1) / (T2-T1))*af

In [34]:
# Concept from : https://github.com/peimengsui/semi_supervised_mnist

from tqdm.notebook import tqdm

acc_scores = []
unlabel = []
pseudo_label = []

alpha_log = []
test_acc_log = []
test_loss_log = []

def semisup_train(model, train_loader, unlabeled_loader, test_loader):
    optimizer = torch.optim.SGD(model.parameters(), lr = 0.1)
    EPOCHS = 30 # 150
    
    # Instead of using current epoch we use a "step" variable to calculate alpha_weight
    # This helps the model converge faster
    step = 100 
    
    model.train()
    for epoch in tqdm(range(EPOCHS)):
        for batch_idx, x_unlabeled in enumerate(unlabeled_loader):
            
            
            # Forward Pass to get the pseudo labels
            x_unlabeled = x_unlabeled[0].to(device)
            model.eval()
            output_unlabeled = model(x_unlabeled)
            _, pseudo_labeled = torch.max(output_unlabeled, 1)
            model.train()
            
            
            """ ONLY FOR VISUALIZATION"""
            if (batch_idx < 3) and (epoch % 10 == 0):
                unlabel.append(x_unlabeled.cpu())
                pseudo_label.append(pseudo_labeled.cpu())
            """ ********************** """
            
            # Now calculate the unlabeled loss using the pseudo label
            output = model(x_unlabeled)
            unlabeled_loss = alpha_weight(step) * F.nll_loss(output, pseudo_labeled)   
            
            # Backpropogate
            optimizer.zero_grad()
            unlabeled_loss.backward()
            optimizer.step()
            
            
            # For every 50 batches train one epoch on labeled data 
            if batch_idx % 50 == 0:
                
                # Normal training procedure
                for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
                    X_batch = X_batch.to(device)
                    y_batch = y_batch.to(device)
                    output = model(X_batch)
                    labeled_loss = F.nll_loss(output, y_batch)

                    optimizer.zero_grad()
                    labeled_loss.backward()
                    optimizer.step()
                
                # Now we increment step by 1
                step += 1
                

        test_acc, test_loss =evaluate(model, test_loader)
        print('Epoch: {} : Alpha Weight : {:.5f} | Test Acc : {:.5f} | Test Loss : {:.3f} '.format(epoch, alpha_weight(step), test_acc, test_loss))
        
        """ LOGGING VALUES """
        alpha_log.append(alpha_weight(step))
        test_acc_log.append(test_acc/100)
        test_loss_log.append(test_loss)
        """ ************** """
        model.train()
 

In [35]:
semisup_train(net, train_loader, unlabeled_loader, test_loader)

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))

Epoch: 0 : Alpha Weight : 0.18500 | Test Acc : 95.62000 | Test Loss : 0.162 
Epoch: 1 : Alpha Weight : 0.37000 | Test Acc : 95.98000 | Test Loss : 0.164 
Epoch: 2 : Alpha Weight : 0.55500 | Test Acc : 96.20000 | Test Loss : 0.147 
Epoch: 3 : Alpha Weight : 0.74000 | Test Acc : 96.31000 | Test Loss : 0.136 
Epoch: 4 : Alpha Weight : 0.92500 | Test Acc : 96.48000 | Test Loss : 0.137 
Epoch: 5 : Alpha Weight : 1.11000 | Test Acc : 96.99000 | Test Loss : 0.123 
Epoch: 6 : Alpha Weight : 1.29500 | Test Acc : 97.09000 | Test Loss : 0.104 
Epoch: 7 : Alpha Weight : 1.48000 | Test Acc : 97.26000 | Test Loss : 0.113 
Epoch: 8 : Alpha Weight : 1.66500 | Test Acc : 97.30000 | Test Loss : 0.111 
Epoch: 9 : Alpha Weight : 1.85000 | Test Acc : 97.41000 | Test Loss : 0.106 
Epoch: 10 : Alpha Weight : 2.03500 | Test Acc : 96.01000 | Test Loss : 0.150 
Epoch: 11 : Alpha Weight : 2.22000 | Test Acc : 97.73000 | Test Loss : 0.089 
Epoch: 12 : Alpha Weight : 2.40500 | Test Acc : 97.75000 | Test Loss : 0.0

In [37]:
test_acc, test_loss = evaluate(net, test_loader)
print('Test Acc : {:.5f} | Test Loss : {:.3f} '.format(test_acc, test_loss))
torch.save(net.state_dict(), 'semi_supervised_weights')

Test Acc : 97.47000 | Test Loss : 0.132 


## Visualizations

In [15]:
unlabel = np.concatenate([u.cpu().numpy() for u in unlabel])
pseudo_label = np.concatenate([u.cpu().numpy() for u in pseudo_label])

In [17]:
x = pd.read_csv('data/mnist_train.csv')
y = x['label']
x.drop(['label'], inplace = True, axis = 1)

x = normalizer.transform(x.values)

tsne_x = np.concatenate([x, x_train, unlabel])
tsne_y = np.concatenate([y.values, y_train, pseudo_label])

embeddings = TSNE(perplexity = 30, n_jobs=-1, verbose = 1, n_iter = 500).fit_transform(tsne_x)

In [21]:
from tqdm import tqdm_notebook
%matplotlib
plt.figure(figsize=(15,10))

step_size = UNLABELED_BS * 3
base_index = x.shape[0]
epoch = 0
for i in tqdm_notebook(range(0,unlabel.shape[0], step_size)):
    plt.scatter(embeddings[:base_index, 0], embeddings[:base_index, 1], c=tsne_y[:base_index], cmap=plt.cm.get_cmap("jet", 10), marker='s', alpha = 0.002, s = 14**2)
    a = base_index
    b = base_index + num_train_samples
    plt.scatter(embeddings[a:b, 0], embeddings[a:b, 1], c=tsne_y[a:b], cmap=plt.cm.get_cmap("jet", 10), marker='o', alpha = 0.3, s = 90**1)
    a = base_index + num_train_samples + i
    b = base_index + num_train_samples + i + step_size
    plt.scatter(embeddings[a:b, 0], embeddings[a:b, 1], c=tsne_y[a:b], cmap=plt.cm.get_cmap("jet", 10), marker='*', s = 150**1)
    plt.colorbar(ticks=range(10))
    plt.clim(-0.5, 9.5)
    plt.title('Epoch : ' + str(epoch) +'  Test Acc : {:.2f}%'.format(test_acc_log[epoch]*100), fontsize = 20)
    plt.savefig('imgs/tsne' + str(i) + '.png')
    plt.draw()
    plt.pause(5)
    plt.clf()
    epoch += 10


Using matplotlib backend: GTK3Agg


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))


