In [None]:
import random
import time
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

In [None]:
# Step 1: Load and Prepare the MNIST Dataset
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

# Load MNIST dataset
full_train_data = MNIST(root='data', train=True, download=True, transform=transform)
test_data = MNIST(root='data', train=False, download=True, transform=transform)

# Split full training data into training and validation
train_size = int(0.2 * len(full_train_data))
valid_size = len(full_train_data) - train_size
train_data, valid_data = random_split(full_train_data, [train_size, valid_size])

# Create data loaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)


In [None]:
# Step 2: Define the Deep Neural Network Model
class DeepNN(nn.Module):
    def __init__(self,**kwargs):
        super(DeepNN, self).__init__()
        self.fc1 = nn.Linear(28*28, l1)
        self.fc2 = nn.Linear(l1, l2)
        self.fc3 = nn.Linear(l2, l3)
        self.fc4 = nn.Linear(l3, l4)
        self.fc5 = nn.Linear(l4, 10)
        self.dropout = nn.Dropout(kwargs['dropout_rate'])

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the images
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))
        x = self.fc5(x)
        return x



In [None]:
#Sector-wise dropout layer
class SWDropOut(torch.nn.Module):
    def __init__(self,**kwargs):
        super().__init__()
        self.group_size = int(kwargs['group_size'])
        self.drop_per_group = int(kwargs['drop_per_group'])
        self.scalar = 1/((self.group_size-self.drop_per_group)/self.group_size)
     
    def forward(self, x):
        if self.training:
            batch,feature = x.size()
            scalar_mask = torch.full((batch*feature,),self.scalar)
            if drop_per_group/group_size <=0.5 :
                for i in range(0,batch*feature,self.group_size):
                    masked_count=0
                    while masked_count < self.drop_per_group:
                        index = random.randint(0, self.group_size-1) 
                        if scalar_mask[i+index] !=0:
                            scalar_mask[i+index]=0
                            masked_count+=1
                scalar_mask = torch.reshape(scalar_mask,(batch,feature))
            else:
                scalar_mask = torch.zeros((batch*feature,))
                for i in range(0,batch*feature,self.group_size):
                    masked_count=0
                    while masked_count < (self.group_size - self.drop_per_group):
                        index = random.randint(0, self.group_size-1) 
                        if scalar_mask[i+index] !=self.scalar:
                            scalar_mask[i+index]=self.scalar
                            masked_count+=1
                scalar_mask = torch.reshape(scalar_mask,(batch,feature))
            x = torch.multiply(x,scalar_mask)
            # print(x)
            # del scalar_mask
            return x
        else:
            return x


In [None]:
# Step 2: Define the Deep Neural Network Model
class DeepNN_SW(nn.Module):
    def __init__(self,**kwargs):
        super(DeepNN_SW, self).__init__()
        self.fc1 = nn.Linear(28*28, l1)
        self.fc2 = nn.Linear(l1, l2)
        self.fc3 = nn.Linear(l2, l3)
        self.fc4 = nn.Linear(l3, l4)
        self.fc5 = nn.Linear(l4, 10)
        self.dropout = SWDropOut(group_size=kwargs['group_size'], drop_per_group=kwargs['drop_per_group'])

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the images
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.dropout(F.relu(self.fc4(x)))
        x = self.fc5(x)
        return x


In [None]:
def train(model,optim,criterion,train_loader):
    train_loss=0
    model.train()
    for data, target in train_loader:
        if train_on_gpu:
            data,target = data.cuda(), target.cuda()
        
        optim.zero_grad()
        output= model(data)
        loss = criterion(output,target)
        loss.backward()
        optim.step()
        train_loss+=loss.item()
        # print(train_loss)
        del data
        del target
    
    return train_loss/len(train_loader)

@torch.no_grad()
def validate(model,criterion,valid_loader):
    model.eval()
    valid_loss=0
    
    for data, target in valid_loader:
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        loss = criterion(output, target)
        valid_loss += loss.item()
        
    valid_loss = valid_loss/len(valid_loader)
    
    return valid_loss

@torch.no_grad()
def predict(model,criterion,test_loader):
    model.eval()
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    test_loss=0
    for data, target in test_loader:
        if train_on_gpu:
            data,target = data.cuda(), target.cuda()
        
        prediction = model(data)
        test_loss += criterion(prediction, target).item()
    
        _, pred = torch.max(prediction, 1)
        correct_tensor = pred.eq(target.data.view_as(pred))
        
        correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
        
        for i in range(len(data)):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1
    
    test_loss = test_loss/len(test_loader.dataset)
    print(len(test_loader.dataset),len(test_loader))
    print('Test Loss: {:.6f}\n'.format(test_loss))
    
    for i in range(10):
        if class_total[i] > 0:
            print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
                i, 100 * class_correct[i] / class_total[i],
                np.sum(class_correct[i]), np.sum(class_total[i])))
        else:
            print('Test Accuracy of %5s: N/A (no training examples)' % (i))
    
    print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(class_correct) / np.sum(class_total),
        np.sum(class_correct), np.sum(class_total)))
    
    return class_correct,test_loss

In [36]:
def train_and_test_dropout(training,validation,testing,epochs, model_filename,dropout_rate,learning_rate):
    do_model = DeepNN(dropout_rate=dropout_rate)
    optimizer = optim.Adamax(do_model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()
    train_loss =[]
    val_loss=[]
    valid_loss_min = 100
    start_time=0
    print('Dropout')
    for i in epochs:
        # print(i)
        start_time=time.time()
        loss = train(do_model,optimizer,loss_fn,training)
        valid_loss = validate(do_model,loss_fn,validation)
        if valid_loss < valid_loss_min:
            torch.save(do_model.state_dict(),model_filename)
        
        train_loss.append(loss)
        val_loss.append(valid_loss)
        print(f'Epoch: {i}  Duration: {time.time()-start_time}')

    y_pred_totals,test_loss = predict(do_model,loss_fn,testing)
    return train_loss,val_loss,test_loss,y_pred_totals

In [37]:
def train_and_test_sector_wise(training,validation,testing,epochs, model_filename,learning_rate,group_size,drop_per_group):
    SW_model = DeepNN_SW(group_size=group_size,drop_per_group=drop_per_group)
    optimizer = optim.Adamax(SW_model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()
    train_loss =[]
    val_loss=[]
    valid_loss_min = 100
    start_time=0
    print('Sector-Wise')
    for i in epochs:
        # print(i)
        start_time=time.time()
        loss = train(SW_model,optimizer,loss_fn,training)
        valid_loss = validate(SW_model,loss_fn,validation)
        if valid_loss < valid_loss_min:
            torch.save(SW_model.state_dict(),model_filename)
        
        train_loss.append(loss)
        val_loss.append(valid_loss)
        print(f'Epoch: {i}  Duration: {time.time()-start_time}')

    y_pred_totals,test_loss = predict(SW_model,loss_fn,testing)
    return train_loss,val_loss,test_loss,y_pred_totals

In [38]:
def makeGraph(trn_loss, val_loss,y_hat,epochs,type):
    fig, (ax11,ax12) = plt.subplots(2,1)
    fig.set_figwidth(6)
    fig.set_figheight(12)
    
    ax11.set_title(type + ' DropOut')
    ax11.plot(epochs,trn_loss,'bo',label='Training Loss')
    ax11.plot(epochs,val_loss,'r',label='Validation Loss')
    ax11.set_ylim([0.0,1.5])
    ax11.set(xlabel='Epochs',ylabel='Loss')
    ax11.legend()
    
    ax12.bar(range(10),y_hat)
    ax12.set(xlabel='Actual',ylabel='Predicted')
    
    
    plt.show()

In [46]:
def overlayGraphs(do_loss, do_val_loss,sw_loss, sw_val_loss,epochs,dropout_rate):
    fig = plt.figure(constrained_layout=True)
    ax = fig.subplot_mosaic([['topL','topR'],['bottomL','bottomR']])
    # fig, ((ax11,ax21),(ax12,ax22)) = plt.subplots(2,2)
    fig.set_figwidth(12)
    fig.set_figheight(12)
    
    ax['bottomL'].set_title(f'DO & SW Validation Loss p={dropout_rate}')
    ax['bottomL'].plot(epochs,do_val_loss,'b',label='Regular')
    ax['bottomL'].set_ylim([0.0,1.0])
    ax['bottomL'].set(xlabel='Epochs',ylabel='Loss')
    ax['bottomL'].plot(epochs,sw_val_loss,'g',label='Sector-Wise')
    ax['bottomL'].legend()
    
    ax['bottomR'].set_title(f'DO & SW Training Loss p={dropout_rate}')
    ax['bottomR'].plot(epochs,do_loss,'bo',label='Regular')
    ax['bottomR'].set_ylim([0.0,1.0])
    ax['bottomR'].set(xlabel='Epochs',ylabel='Loss')
    ax['bottomR'].plot(epochs,sw_loss,'go',label='Sector-Wise')
    ax['bottomR'].legend()
    
    ax['topL'].set_title(f'Regular DropOut p={dropout_rate}')
    ax['topL'].plot(epochs,do_loss,'bo',label='Training Loss')
    ax['topL'].plot(epochs,do_val_loss,'r',label='Validation Loss')
    ax['topL'].set_ylim([0.0,1.0])
    ax['topL'].set(xlabel='Epochs',ylabel='Loss')
    ax['topL'].legend()
    
    ax['topR'].set_title(f'Sector-Wise DropOut p={dropout_rate}')
    ax['topR'].plot(epochs,sw_loss,'bo',label='Training Loss')
    ax['topR'].plot(epochs,sw_val_loss,'r',label='Validation Loss')
    ax['topR'].set_ylim([0.0,1.0])
    ax['topR'].set(xlabel='Epochs',ylabel='Loss')
    ax['topR'].legend()
   
    plt.show()

In [40]:
# makeGraph(sw_t_loss,sw_v_loss,sw_correct_class_totals, epochs, 'Sector-Wise')

In [41]:
global l1
global l2
global l3
global l4
l1 = 1200 #1024
l2 = 1200 #1024
l3 = 240  #256
l4 = 120  #128

epochs=range(100)
learning_rate=0.01

In [None]:
# dropout_rate = 0.0
# group_size=100
# drop_per_group=0
# sw_t_loss100,   sw_v_loss100,   sw_tst_loss100, sw_correct_class_totals100 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-100.pt',learning_rate,group_size,drop_per_group)
# do_t_loss100,   do_v_loss100,   do_tst_loss100, do_correct_class_totals100 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-100.pt',dropout_rate,learning_rate)
# 
# dropout_rate = 0.10
# group_size=10
# drop_per_group=1
# sw_t_loss90,   sw_v_loss90,   sw_tst_loss90, sw_correct_class_totals90 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-90.pt',learning_rate,group_size,drop_per_group)
# do_t_loss90,   do_v_loss90,   do_tst_loss90, do_correct_class_totals90 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-90.pt',dropout_rate,learning_rate)
# 
# dropout_rate = 0.20
# group_size=5
# drop_per_group=1
# sw_t_loss80,   sw_v_loss80,   sw_tst_loss80, sw_correct_class_totals80 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-80.pt',learning_rate,group_size,drop_per_group)
# do_t_loss80,   do_v_loss80,   do_tst_loss80, do_correct_class_totals80 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-80.pt',dropout_rate,learning_rate)
# 
# dropout_rate = 0.25
# group_size=4
# drop_per_group=1
# sw_t_loss75,   sw_v_loss75,   sw_tst_loss75, sw_correct_class_totals75 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-75.pt',learning_rate,group_size,drop_per_group)
# do_t_loss75,   do_v_loss75,   do_tst_loss75, do_correct_class_totals75 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-75.pt',dropout_rate,learning_rate)
# 
# dropout_rate = 0.33
# group_size=3
# drop_per_group=1
# sw_t_loss66,   sw_v_loss66,   sw_tst_loss66, sw_correct_class_totals66 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-66.pt',learning_rate,group_size,drop_per_group)
# do_t_loss66,   do_v_loss66,   do_tst_loss66, do_correct_class_totals66 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-66.pt',dropout_rate,learning_rate)
# 
# dropout_rate = 0.5
# group_size=2
# drop_per_group=1
# sw_t_loss50,   sw_v_loss50,   sw_tst_loss50, sw_correct_class_totals50 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-50.pt',learning_rate,group_size,drop_per_group)
# do_t_loss50,   do_v_loss50,   do_tst_loss50, do_correct_class_totals50 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-50.pt',dropout_rate,learning_rate)
# 
# dropout_rate = 0.66
# group_size=3
# drop_per_group=2
# sw_t_loss33,   sw_v_loss33,   sw_tst_loss33, sw_correct_class_totals33 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-33.pt',learning_rate,group_size,drop_per_group)
# do_t_loss33,   do_v_loss33,   do_tst_loss33, do_correct_class_totals33 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-33.pt',dropout_rate,learning_rate)

dropout_rate = 0.75
group_size=4
drop_per_group=3
sw_t_loss25,   sw_v_loss25,   sw_tst_loss25, sw_correct_class_totals25 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-25.pt',learning_rate,group_size,drop_per_group)
do_t_loss25,   do_v_loss25,   do_tst_loss25, do_correct_class_totals25 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-25.pt',dropout_rate,learning_rate)

dropout_rate = 0.80
group_size=5
drop_per_group=4
sw_t_loss20,   sw_v_loss20,   sw_tst_loss20, sw_correct_class_totals20 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-20.pt',learning_rate,group_size,drop_per_group)
do_t_loss20,   do_v_loss20,   do_tst_loss20, do_correct_class_totals20 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-20.pt',dropout_rate,learning_rate)

dropout_rate = 0.90
group_size=10
drop_per_group=9
sw_t_loss10,   sw_v_loss10,   sw_tst_loss10, sw_correct_class_totals10 = train_and_test_sector_wise(train_loader,valid_loader,test_loader,epochs,'sw-model-mnist-10.pt',learning_rate,group_size,drop_per_group)
do_t_loss10,   do_v_loss10,   do_tst_loss10, do_correct_class_totals10 = train_and_test_dropout(train_loader,valid_loader,test_loader,epochs,'do-model-mnist-10.pt',dropout_rate,learning_rate)


Sector-Wise


In [None]:
overlayGraphs(do_t_loss100,do_v_loss100,sw_t_loss100,sw_v_loss100, epochs,0.0)
overlayGraphs(do_t_loss90,do_v_loss90,sw_t_loss90,sw_v_loss90, epochs,0.90)
overlayGraphs(do_t_loss80,do_v_loss80,sw_t_loss80,sw_v_loss80, epochs,0.80)
overlayGraphs(do_t_loss75,do_v_loss75,sw_t_loss75,sw_v_loss75, epochs,0.75)
overlayGraphs(do_t_loss66,do_v_loss66,sw_t_loss66,sw_v_loss66, epochs,0.66)
overlayGraphs(do_t_loss50,do_v_loss50,sw_t_loss50,sw_v_loss50, epochs,0.50)
overlayGraphs(do_t_loss33,do_v_loss33,sw_t_loss33,sw_v_loss33, epochs,0.33)
overlayGraphs(do_t_loss25,do_v_loss25,sw_t_loss25,sw_v_loss25, epochs,0.25)
overlayGraphs(do_t_loss20,do_v_loss20,sw_t_loss20,sw_v_loss20, epochs,0.20)
overlayGraphs(do_t_loss10,do_v_loss10,sw_t_loss10,sw_v_loss10, epochs,0.10)
