In [2]:
import random
import time
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is not available.  Training on CPU ...


In [41]:
# Step 1: Load and Prepare the MNIST Dataset
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

# Load MNIST dataset
full_train_data = MNIST(root='data', train=True, download=True, transform=transform)
test_data = MNIST(root='data', train=False, download=True, transform=transform)

# Split full training data into training and validation
train_size = int(0.2 * len(full_train_data))
valid_size = len(full_train_data) - train_size
train_data, valid_data = random_split(full_train_data, [train_size, valid_size])

# Create data loaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)


In [61]:
# Step 2: Define the Deep Neural Network Model
class DeepNN(nn.Module):
    def __init__(self,**kwargs):
        super(DeepNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 256)
        self.fc4 = nn.Linear(256, 128)
        self.fc5 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(kwargs['dropout_rate'])

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the images
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))
        x = self.fc5(x)
        return x



In [94]:
#Sector-wise dropout layer
class SWDropOut(torch.nn.Module):
    def __init__(self,**kwargs):
        super().__init__()
        self.group_size = int(kwargs['group_size'])
        self.drop_per_group = int(kwargs['drop_per_group'])
        self.scalar = 1/((self.group_size-self.drop_per_group)/self.group_size)
     
    # def forward(self, x):
    #     if self.training:
    #         scalar_mask = torch.full(x.size(),self.scalar)
    #         batch,feature = x.size()
    #         for item in range(batch):
    #             for i in range(0,feature,self.group_size):
    #                 masked_count=0
    #                 while masked_count < self.drop_per_group:
    #                     index = random.randint(0, self.group_size-1) 
    #                     if scalar_mask[item][i+index] !=0:
    #                         scalar_mask[item][i+index]=0
    #                         masked_count+=1
    #         x = torch.multiply(x,scalar_mask)
    #         # print(x)
    #         return x
    #     else:
    #         return x
    def forward(self, x):
        if self.training:
            batch,feature = x.size()
            scalar_mask = torch.full((batch*feature,),self.scalar)
            for i in range(0,batch*feature,self.group_size):
                masked_count=0
                while masked_count < self.drop_per_group:
                    index = random.randint(0, self.group_size-1) 
                    if scalar_mask[i+index] !=0:
                        scalar_mask[i+index]=0
                        masked_count+=1
            scalar_mask = torch.reshape(scalar_mask,(batch,feature))
            x = torch.multiply(x,scalar_mask)
            # print(x)
            del scalar_mask
            return x
        else:
            return x


In [83]:
# Step 2: Define the Deep Neural Network Model
class DeepNN_SW(nn.Module):
    def __init__(self,**kwargs):
        super(DeepNN_SW, self).__init__()
        self.fc1 = nn.Linear(28*28, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 256)
        self.fc4 = nn.Linear(256, 128)
        self.fc5 = nn.Linear(128, 10)
        self.dropout = SWDropOut(group_size=kwargs['group_size'], drop_per_group=kwargs['drop_per_group'])

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the images
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))
        x = self.fc5(x)
        return x


In [84]:
def train(model,optim,criterion,train_loader):
    train_loss=0
    model.train()
    for data, target in train_loader:
        if train_on_gpu:
            data,target = data.cuda(), target.cuda()
        
        optim.zero_grad()
        output= model(data)
        loss = criterion(output,target)
        loss.backward()
        optim.step()
        train_loss+=loss.item()
        # print(train_loss)
        del data
        del target
    
    return train_loss/len(train_loader)

@torch.no_grad()
def validate(model,criterion,valid_loader):
    model.eval()
    valid_loss=0
    
    for data, target in valid_loader:
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        loss = criterion(output, target)
        valid_loss += loss.item()
        
    valid_loss = valid_loss/len(valid_loader)
    
    return valid_loss

@torch.no_grad()
def predict(model,criterion,test_loader):
    model.eval()
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    test_loss=0
    for data, target in test_loader:
        if train_on_gpu:
            data,target = data.cuda(), target.cuda()
        
        prediction = model(data)
        test_loss += criterion(prediction, target).item()
    
        _, pred = torch.max(prediction, 1)
        correct_tensor = pred.eq(target.data.view_as(pred))
        
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        
        for i in range(len(data)):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1
    
    test_loss = test_loss/len(test_loader.dataset)
    print(len(test_loader.dataset),len(test_loader))
    print('Test Loss: {:.6f}\n'.format(test_loss))
    
    for i in range(10):
        if class_total[i] > 0:
            print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                i, 100 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            print('Test Accuracy of %5s: N/A (no training examples)' % (i))
    
    print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))
    
    return class_correct,test_loss

In [85]:
def train_and_test_dropout(model,training,validation,testing,epochs, model_filename):
    optimizer = optim.Adamax(model.parameters(), lr=.01)
    loss_fn = nn.CrossEntropyLoss()
    train_loss =[]
    val_loss=[]
    valid_loss_min = 100
    start_time=0
    for i in epochs:
        # if i == 10:
        #     model.update_group_details(4,2)
        # elif i == 20:
        #     model.update_group_details(4,3)
        print(i)
        start_time=time.time()
        loss = train(model,optimizer,loss_fn,training)
        valid_loss = validate(model,loss_fn,validation)
        if valid_loss < valid_loss_min:
            torch.save(model.state_dict(),model_filename)
        
        train_loss.append(loss)
        val_loss.append(valid_loss)
        print("duration: ",time.time()-start_time)

    y_pred_totals,test_loss = predict(model,loss_fn,testing)
    
    return train_loss,val_loss,test_loss,y_pred_totals

In [86]:
def makeGraph(trn_loss, val_loss,y_hat,epochs,type):
    fig, (ax11,ax12) = plt.subplots(2,1)
    fig.set_figwidth(6)
    fig.set_figheight(12)
    
    ax11.set_title(type + ' DropOut')
    ax11.plot(epochs,trn_loss,'bo',label='Training Loss')
    ax11.plot(epochs,val_loss,'r',label='Validation Loss')
    ax11.set_ylim([0.0,1.5])
    ax11.set(xlabel='Epochs',ylabel='Loss')
    ax11.legend()
    
    ax12.bar(range(10),y_hat)
    ax12.set(xlabel='Actual',ylabel='Predicted')
    
    
    plt.show()

In [87]:
def overlayGraphs(do_loss, do_val_loss,sw_loss, sw_val_loss,epochs):
    fig = plt.figure(constrained_layout=True)
    ax = fig.subplot_mosaic([['topL','topR'],['bottomL','bottomR']])
    # fig, ((ax11,ax21),(ax12,ax22)) = plt.subplots(2,2)
    fig.set_figwidth(12)
    fig.set_figheight(12)
    
    ax['bottomL'].set_title('DO & SW Validation Loss')
    ax['bottomL'].plot(epochs,do_val_loss,'b',label='Regular')
    ax['bottomL'].set_ylim([0.0,1.5])
    ax['bottomL'].set(xlabel='Epochs',ylabel='Loss')
    ax['bottomL'].plot(epochs,sw_val_loss,'g',label='Sector-Wise')
    ax['bottomL'].legend()
    
    ax['bottomR'].set_title('DO & SW Training Loss')
    ax['bottomR'].plot(epochs,do_loss,'bo',label='Regular')
    ax['bottomR'].set_ylim([0.0,1.5])
    ax['bottomR'].set(xlabel='Epochs',ylabel='Loss')
    ax['bottomR'].plot(epochs,sw_loss,'go',label='Sector-Wise')
    ax['bottomR'].legend()
    
    ax['topL'].set_title('Regular DropOut')
    ax['topL'].plot(epochs,do_loss,'bo',label='Training Loss')
    ax['topL'].plot(epochs,do_val_loss,'r',label='Validation Loss')
    ax['topL'].set_ylim([0.0,1.5])
    ax['topL'].set(xlabel='Epochs',ylabel='Loss')
    ax['topL'].legend()
    
    ax['topR'].set_title('Sector-Wise DropOut')
    ax['topR'].plot(epochs,sw_loss,'bo',label='Training Loss')
    ax['topR'].plot(epochs,sw_val_loss,'r',label='Validation Loss')
    ax['topR'].set_ylim([0.0,1.5])
    ax['topR'].set(xlabel='Epochs',ylabel='Loss')
    ax['topR'].legend()
   
    plt.show()

In [88]:
epochs=range(100)
torch.set_printoptions(threshold=10_000)

dropout_rate=0.25
group_size=4
drop_per_group=1

In [None]:
SW_model = DeepNN_SW(group_size=group_size,drop_per_group=drop_per_group)

sw_t_loss,sw_v_loss,sw_tst_loss,sw_correct_class_totals = train_and_test_dropout(SW_model,train_loader,valid_loader,test_loader,epochs,'sw-model-cifar.pt')

0
duration:  122.64493298530579
1
duration:  122.83762192726135
2


In [None]:
makeGraph(sw_t_loss,sw_v_loss,sw_correct_class_totals, epochs, 'Sector-Wise')

In [None]:
#STANDARD DROPOUT MODEL
do_model = DeepNN(dropout_rate=dropout_rate)

do_t_loss,do_v_loss,do_tst_loss,do_correct_class_totals = train_and_test_dropout(do_model,train_loader,valid_loader,test_loader,epochs,'do-model-cifar.pt')

In [None]:
makeGraph(do_t_loss,do_v_loss,do_correct_class_totals, epochs, 'Regular')

In [None]:
overlayGraphs(do_t_loss,do_v_loss,sw_t_loss,sw_v_loss, epochs)