<a href="https://colab.research.google.com/github/AnuragQ/python_apps/blob/master/PytorchDLMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch.nn as nn
from torch import optim
from torch.utils.data import DataLoader
import torch
import numpy as np
from torchvision import datasets, transforms

In [0]:
class Net(nn.Module):
    def __init__(self, layers):
        super().__init__()
        n_layers=[]
        
        for i in range(len(layers)-2):
            n_layers.append(nn.Linear(layers[i], layers[i+1]))
            n_layers.append(nn.ReLU(inplace=True))
        
        n_layers.append(nn.Linear(layers[-2], layers[-1]))
        
        self.model = nn.ModuleList(n_layers)
#         self.model= nn.Sequential(
#             nn.Linear(784, 1000),
#             nn.Linear(1000, 10)
#         )
    def forward(self, x):
        x=x.view(-1,784)
        for l in self.model:
            x=l(x)
        return x
#     def set_masks(self, masks,layers):
#         # Should be a less manual way to set masks
#         # Leave it for the future
#         for i in range(len(layers)-2):
            
#             self.n_layers[i].set_mask(masks[i])
        


In [0]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
net = Net([784, 1000, 1000, 500, 200, 10]).to(device)
optimizer = optim.RMSprop(net.parameters(), lr=1e-3)

critic = nn.CrossEntropyLoss()
net

Net(
  (model): ModuleList(
    (0): Linear(in_features=784, out_features=1000, bias=True)
    (1): ReLU(inplace)
    (2): Linear(in_features=1000, out_features=1000, bias=True)
    (3): ReLU(inplace)
    (4): Linear(in_features=1000, out_features=500, bias=True)
    (5): ReLU(inplace)
    (6): Linear(in_features=500, out_features=200, bias=True)
    (7): ReLU(inplace)
    (8): Linear(in_features=200, out_features=10, bias=True)
  )
)

In [0]:
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])


train_loader = DataLoader(
    datasets.MNIST('.', train=True, download=True, transform=train_transforms), batch_size=64, shuffle=True
)

test_loader = DataLoader(
    datasets.MNIST('.', train=False, download=True, transform=test_transforms), batch_size=64, shuffle=False
)

In [0]:
def train(epochs, model, optimizer, critic, train_dl):
    model.train()
    
    for ep in range(epochs):
        print(f'Epoch: {ep+1}')
        for batch_i, (x_batch, y_batch) in enumerate(train_dl):
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            preds = model(x_batch)
            loss = critic(preds, y_batch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if batch_i % 100 == 0:
                pred_labels = torch.argmax(preds, dim=1)
                acc = (pred_labels == y_batch).sum().float()
                print(f'\tLoss: {loss.item():.4f} \t Accuracy: {acc/x_batch.shape[0]:.2f}')

In [0]:
train(2,net, optimizer, critic, train_loader)

Epoch: 1
	Loss: 2.3002 	 Accuracy: 0.11
	Loss: 0.5648 	 Accuracy: 0.91
	Loss: 0.1975 	 Accuracy: 0.92
	Loss: 0.1967 	 Accuracy: 0.94
	Loss: 0.1762 	 Accuracy: 0.95
	Loss: 0.1260 	 Accuracy: 0.97
	Loss: 0.1769 	 Accuracy: 0.95
	Loss: 0.0994 	 Accuracy: 0.95
	Loss: 0.0882 	 Accuracy: 0.97
	Loss: 0.1000 	 Accuracy: 0.97
Epoch: 2
	Loss: 0.1601 	 Accuracy: 0.97
	Loss: 0.1944 	 Accuracy: 0.92
	Loss: 0.1700 	 Accuracy: 0.94
	Loss: 0.1246 	 Accuracy: 0.97
	Loss: 0.0297 	 Accuracy: 1.00
	Loss: 0.1931 	 Accuracy: 0.95
	Loss: 0.1375 	 Accuracy: 0.97
	Loss: 0.0616 	 Accuracy: 0.97
	Loss: 0.1417 	 Accuracy: 0.97
	Loss: 0.1367 	 Accuracy: 0.97


In [0]:
# def weight_prune(model, pruning_perc):
#     '''
#     Prune pruning_perc% weights globally (not layer-wise)
#     arXiv: 1606.09274
#     '''    
#     all_weights = []
#     for p in model.parameters():
#         if len(p.data.size()) != 1:
#             all_weights += list(p.cpu().data.abs().numpy().flatten())
#     threshold = np.percentile(np.array(all_weights), pruning_perc)

#     # generate mask
#     masks = []
#     for p in model.parameters():
#         if len(p.data.size()) != 1:
#             pruned_inds = p.data.abs() > threshold
#             masks.append(pruned_inds.float())
#     return masks

In [0]:
def test(model):
    correct = 0
    total = 0
    model.eval()

    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images.to(device))
            outputs.data=outputs.data.to(device)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels.to(device)).sum().item()

    print('Accuracy of the network on test images: %d %%' % (
        100 * correct / total))

Accuracy of the network on test images: 97 %


In [0]:
# print(threshold)

0.05216322652995588


In [0]:
def weight_prune(model,percentage)
    all_weights = []
    for p in model.parameters():
        if len(p.data.size()) != 1:
            all_weights += list(p.cpu().data.abs().numpy().flatten())
    threshold = np.percentile(np.array(all_weights), percentage)

    for arr in model.model[0:6:2]:
        for arr in model.model[0:7:2]:
            arr.weight.data[abs(arr.weight.data)<threshold]=0.             

        

In [0]:
net.model[2].weight.data

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000, -0.1154,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000, -0.0621,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000, -0.0904,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0756,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0')

In [0]:
test(net)
weightPrune()

Accuracy of the network on test images: 96 %
