# Github resources related to the original paper:


1.   https://github.com/facebookresearch/SparseConvNet
2.   https://github.com/btgraham/SparseConvNet-archived
3.   https://github.com/btgraham/SparseConvNet-CPU-archived
4.   https://github.com/traveller59/spconv



# 1. Reproduce MNIST results on DeepCNet(5,10)

---




*   Data preprocessing: shift up to +-2pixels and do data augmentation
*   CNN architecture: 6 conv layers, 5 max pooling layers(2,2), ReLU for hidden layers, softmax for output layer, input spatial size: 96,96(by an easy way of padding zero on axis1 and axis2 each side of 34)

## 1.1 Download the MNIST dataset


In [None]:
!wget www.di.ens.fr/~lelarge/MNIST.tar.gz
!tar -zxvf MNIST.tar.gz

--2021-04-14 02:45:54--  http://www.di.ens.fr/~lelarge/MNIST.tar.gz
Resolving www.di.ens.fr (www.di.ens.fr)... 129.199.99.14
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.di.ens.fr/~lelarge/MNIST.tar.gz [following]
--2021-04-14 02:45:55--  https://www.di.ens.fr/~lelarge/MNIST.tar.gz
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/x-gzip]
Saving to: ‘MNIST.tar.gz.3’

MNIST.tar.gz.3          [        <=>         ]  33.20M  6.82MB/s    in 14s     

2021-04-14 02:46:09 (2.40 MB/s) - ‘MNIST.tar.gz.3’ saved [34813078]

MNIST/
MNIST/raw/
MNIST/raw/train-labels-idx1-ubyte
MNIST/raw/t10k-labels-idx1-ubyte.gz
MNIST/raw/t10k-labels-idx1-ubyte
MNIST/raw/t10k-images-idx3-ubyte.gz
MNIST/raw/train-images-idx3-ubyte

gzip: stdin: unexpected end of file
tar: Unexpected EOF in archive
tar: Unexpe

## 1.2 Prepare the training set and the test set

In [None]:
import os
import torch
import torch.nn as nn
import torch.utils.data as Data
from torchvision import datasets
from torchvision.datasets import MNIST
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from scipy.ndimage.interpolation import shift
import numpy as np
import time

torch.manual_seed(1)    # reproducible


<torch._C.Generator at 0x7f2fdbb8c910>

## 1.3 Define functions and DeepCNet architecture

In [None]:
def try_gpu():
    """
    If GPU is available, return torch.device as cuda:0; else return torch.device
    as cpu.
    """
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    return device

In [None]:

class Net(nn.Module):
    """
    13-layer DeepCNet: 
      6 conv layers, 
      5 max pooling layers(2,2), 
      ReLU for hidden layers, 
      Softmax for output layer

    Args:
        in_channels: number of features of the input image ("depth of image")
        hidden_channels: number of hidden features ("depth of convolved images")
        out_features: number of features in output layer
        output_resize_para: the parameter used for the final Linear layer
    """
    
    def __init__(self, in_channels, hidden_channels, out_features, output_resize_para):
        super(Net, self).__init__()

        self.conv1 = nn.Sequential(         # input shape (1, 96, 96) (should be preprocessed to be sparse)
            nn.Conv2d(
                in_channels=in_channels,              
                out_channels=hidden_channels[0],            
                kernel_size=3,              # filter size
                stride=1,                   # filter movement/step
                padding=1,                  
            ),                              
            nn.ReLU(),                      # activation
            nn.MaxPool2d(kernel_size=2),    # choose max value in 2x2 area
        )

        self.conv2 = nn.Sequential(        
            nn.Conv2d(hidden_channels[0], 
                      hidden_channels[1],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                   
            nn.MaxPool2d(2),               
        )

        self.conv3 = nn.Sequential(         
            nn.Conv2d(hidden_channels[1], 
                      hidden_channels[2],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                     
            nn.MaxPool2d(2),                
        )

        self.conv4 = nn.Sequential(         
            nn.Conv2d(hidden_channels[2], 
                      hidden_channels[3],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                     
            nn.MaxPool2d(2),                
        )
        
        self.conv5 = nn.Sequential(         
            nn.Conv2d(hidden_channels[3], 
                      hidden_channels[4],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                     
            nn.MaxPool2d(2),                
        )

        self.conv6 = nn.Sequential(         
            nn.Conv2d(hidden_channels[4], 
                      hidden_channels[5],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                     
                 
        )
  
        self.fc = nn.Linear(output_resize_para*output_resize_para*hidden_channels[5], out_features)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x           

In [None]:
class NetDropout(nn.Module):
    """
    13-layer DeepCNet: 
      6 conv layers, 
      5 max pooling layers(2,2), 
      ReLU for hidden layers, 
      Softmax for output layer

    Args:
        in_channels: number of features of the input image ("depth of image")
        hidden_channels: number of hidden features ("depth of convolved images")
        out_features: number of features in output layer
        output_resize_para: the parameter used for the final Linear layer
    """
    
    def __init__(self, in_channels, hidden_channels, out_features, output_resize_para):
        super(NetDropout, self).__init__()

        self.conv1 = nn.Sequential(         # input shape (1, 96, 96) (should be preprocessed to be sparse)
            nn.Conv2d(
                in_channels=in_channels,              
                out_channels=hidden_channels[0],            
                kernel_size=3,              # filter size
                stride=1,                   # filter movement/step
                padding=1,                  
            ),                              
            nn.ReLU(),                      # activation
            nn.MaxPool2d(kernel_size=2),    # choose max value in 2x2 area
        )

        self.conv2 = nn.Sequential(        
            nn.Conv2d(hidden_channels[0], 
                      hidden_channels[1],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                   
            nn.MaxPool2d(2),               
        )

        self.conv3 = nn.Sequential(         
            nn.Conv2d(hidden_channels[1], 
                      hidden_channels[2],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                     
            nn.MaxPool2d(2),                
        )

        self.conv4 = nn.Sequential(         
            nn.Conv2d(hidden_channels[2], 
                      hidden_channels[3],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                     
            nn.MaxPool2d(2),                
        )
        
        self.conv5 = nn.Sequential(         
            nn.Conv2d(hidden_channels[3], 
                      hidden_channels[4],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                     
            nn.MaxPool2d(2),                
        )

        self.conv6 = nn.Sequential(         
            nn.Conv2d(hidden_channels[4], 
                      hidden_channels[5],
                      kernel_size=2, 
                      stride=1,
                      padding=1,
                      ),     
            nn.ReLU(),                     
                 
        )
  
        self.fc = nn.Linear(output_resize_para*output_resize_para*hidden_channels[5], out_features)
        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.2)
        self.dropout3 = nn.Dropout(0.3)
        self.dropout4 = nn.Dropout(0.4)
        self.dropout5 = nn.Dropout(0.5)
        

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.dropout1(self.conv3(x))
        x = self.dropout2(self.conv4(x))
        x = self.dropout3(self.conv5(x))
        x = self.dropout4(self.conv6(x))
        x = x.view(x.size(0), -1)
        x = self.dropout5(self.fc(x))
        return x           

In [None]:
def evaluate_accuracy(data_loader, net, device=torch.device('cpu')):
    """Evaluate accuracy of a model on the given data set."""
    net.eval()  #make sure network is in evaluation mode

    #init
    acc_sum = torch.tensor([0], dtype=torch.float32, device=device)
    n = 0

    for X, y in data_loader:
        # Copy the data to device.
        X, y = X.to(device), y.to(device)
        X = nn.functional.pad(X, (34,34,34,34,0,0,0,0,), 'constant', 0)
        with torch.no_grad():
            y = y.long()
            outputs = net(X)
            acc_sum += torch.sum((torch.argmax(outputs, dim=1) == y))
            n += y.shape[0] #increases with the number of samples in the batch
    return acc_sum.item()/n

def train(train_loader, device, model, criterion, optimizer):
    model.train()
    train_losses = []
  # Training loop
    for i, (x_batch, y_batch) in enumerate(train_loader):

        # Set to same device
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        x_batch = nn.functional.pad(x_batch, (34,34,34,34,0,0,0,0,), 'constant', 0)
        
        x_batch = torch.cat([x_batch, torch.roll(x_batch,2,2), torch.roll(x_batch,-2,2)])
        y_batch = torch.cat([y_batch, y_batch, y_batch])
        
        # Set the gradients to zero
        optimizer.zero_grad()

        # Perform forward pass
        y_pred = model(x_batch)

        # Compute the loss
        loss = criterion(y_pred, y_batch)
        # Backward computation and update
        loss.backward()
        optimizer.step()
        train_losses.append(loss)
    return train_losses

# 1.4 MNIST training process

In [None]:
BATCH_SIZE = 256

# Download the MNIST dataset
train_data = MNIST(root = './', train=True, download=True, transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))
test_data = MNIST(root = './', train=False, download=True, transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]))

# Data Loader for easy mini-batch
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = Data.DataLoader(dataset=test_data, batch_size=BATCH_SIZE)

In [None]:
# training process
in_channels = 1 # Black-white images in MNIST digits
hidden_channels = [60, 120, 180, 240, 300, 360]
out_features = 10 

# Training parameters

learning_rate = 0.001
epochs = 100

# Initialize network
net = NetDropout(in_channels, hidden_channels, out_features, 4)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Define list to store losses and performances of each iteration
train_accs = []
test_accs = []
train_losses = []


# Try using gpu instead of cpu
device = try_gpu()
train_start = time.time()
for epoch in range(epochs):

    # Network in training mode and to device
    net.to(device)

    train_loss = train(train_loader, device, net, criterion, optimizer)
    # Compute train and test error
    train_acc = 100*evaluate_accuracy(train_loader, net.to('cpu'))
    test_acc = 100*evaluate_accuracy(test_loader, net.to('cpu'))
    
    # Development of performance
    train_accs.append(train_acc)
    test_accs.append(test_acc)
    train_losses.append(train_loss)
    

    # Print performance

    print('Epoch: {:.0f}'.format(epoch+1))
    print(train_accs)
    print(test_accs)
    print(train_losses)
    # print('Accuracy of train set: {:.2f}%'.format(train_acc))
    # print('Accuracy of test set: {:.2f}%'.format(test_acc))
    print('')     


## 1.5 Evaluate and plot the curves

In [None]:
# Plot training curves
plt.figure(figsize=(9,4))
plt.subplot(1,2,1)
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.plot(train_losses, label="train")
plt.grid()

plt.subplot(1,2,2)
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.plot(train_accs, label = 'train')
plt.plot(test_accs, label = 'test')
plt.legend()
plt.grid()

(Supplementary: Due to the resource constraints, we could not get reasonable results on CIFAR10 and CIFAR100)

In [None]:
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(degrees=40, translate=(0.2, 0.5)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

batch_size = 128
cifar_trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainset_loader = torch.utils.data.DataLoader(cifar_trainset, batch_size=batch_size,shuffle=True, num_workers=4)

cifar_testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testset_loader = torch.utils.data.DataLoader(cifar_testset, batch_size=batch_size, shuffle=False, num_workers=4)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ./data/cifar-10-python.tar.gz to ./data


  cpuset_checked))


Files already downloaded and verified


In [None]:
# training process
in_channels = 3 
hidden_channels = [300, 600, 900, 1200, 1500, 1800]
out_features = 10 

# Training parameters

learning_rate = 0.001
epochs = 100

# Initialize network
net = NetDropout(in_channels, hidden_channels, out_features, 4)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Define list to store losses and performances of each iteration
train_losses = []
train_accs = []
test_accs = []


# Try using gpu instead of cpu
device = try_gpu()
train_start = time.time()
for epoch in range(epochs):

    # Network in training mode and to device
    net.to(device)

    

    train_loss = train(trainset_loader, device, net, criterion, optimizer)
    
    # Compute train and test error
    train_acc = 100*evaluate_accuracy(trainset_loader, net.to('cpu'))
    test_acc = 100*evaluate_accuracy(testset_loader, net.to('cpu'))
    
    # Development of performance
    train_accs.append(train_acc)
    test_accs.append(test_acc)
    train_losses.append(train_loss)
    
    # Print performance
    print('Epoch: {:.0f}'.format(epoch+1))
    print('Accuracy of train set: {:.2f}%'.format(train_acc))
    print('Accuracy of test set: {:.2f}%'.format(test_acc))
    print('Epoch training time: {:.2f}'.format(time.time()-train_start))
    print('')    


  cpuset_checked))


Epoch: 1
Accuracy of train set: 22.55%
Accuracy of test set: 23.93%
Epoch training time: 8950.56



# CNN

In [None]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00208/Online%20Handwritten%20Assamese%20Characters%20Dataset.rar
!unrar e -idq -cl -y "Online Handwritten Assamese Characters Dataset.rar"
!mkdir input
!mv *.txt input/
!grep "CHARACTER_NAME" input/*.txt > labels

In [None]:
from torch import nn
import torch

class Net(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_features,output_resize = 3):
        super(Net, self).__init__()
        self.net = nn.Sequential(
        nn.Conv2d(in_channels, hidden_channels[0],kernel_size=3,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(hidden_channels[0], hidden_channels[1],kernel_size=5,padding=2),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(hidden_channels[1], hidden_channels[2],kernel_size=5,padding=2),
        nn.ReLU(),
        nn.MaxPool2d(2),
        )
        self.fn = nn.Linear(output_resize*output_resize*hidden_channels[2], out_features)

    def forward(self, x):
        x = self.net(x)
        x = x.view(x.size(0), -1)
        x = self.fn(x)
        return x




def evaluate_accuracy(data_loader, net, device=torch.device('cpu')):
    net.eval()  
    acc_sum = torch.tensor([0], dtype=torch.float32, device=device)
    n = 0

    for X, y in data_loader:
        X, y = X.to(device), y.to(device)
        with torch.no_grad():
            y = y.long()
            acc_sum += torch.sum((torch.argmax(net(X), dim=1) == y))
            n += y.shape[0]
    return acc_sum.item()/n




In [None]:
import numpy as np
from torch.utils.data import Dataset
import torch
import glob

class AssameseData(Dataset):

    def _read_img(self, path, scaling=173):
        with open(path, 'r') as f:
            # print("Processing: " + path)
            label = f.readline()
            label = label.split()[1].strip()
            stoke = f.readline()
            stoke = stoke.split()[1].strip()
            f.readline()
            img = np.zeros((4868 // scaling, 4868 // scaling))
            for line in f.readlines():
                if ("PEN_DOWN" not in line and "PEN_UP" not in line and "END_CHARACTER" not in line):
                    x, y = tuple(map(int, map(str.strip, line.split()[0:2])))
                    img[x // scaling, y // scaling] = 254

        return img, label

    def __init__(self, input_dir, transform=None):
        self.input_dir = input_dir
        self.transform = transform
        self.texts = glob.glob(self.input_dir + "/*.txt")
        self.length = len(self.texts)

        y = []
        with open('labels', 'r') as f:
            for line in f.readlines():
                y.append(line.strip().split()[-1].strip())

        p = set(y)
        print(p)

        self.label_mapping = {el: num for num, el in enumerate(p)}
        print(self.label_mapping)
        print(len(self.label_mapping))
        print((max(self.label_mapping.values())))

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        image, label = self._read_img(self.texts[idx])

        if self.transform:
            image = self.transform(image)

        # image = torch.tensor(np.reshape(image, (1, *image.shape)), dtype=torch.float)
        image = image.float()

        label = torch.tensor(self.label_mapping[label])



        sample = (image, label)

        return sample



In [None]:
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from torchsummary import summary
import time



transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.1201,), (0.3071,))])


train_data = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_data = datasets.MNIST('./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)


# data = AssameseData('input', transform)
#
# split = int(len(data) * 0.8)
#
# train, test = random_split(data, [split, len(data) - split])
# train_loader = DataLoader(data, batch_size=16, shuffle=True)
# test_loader = DataLoader(train, batch_size=16)
#
# train_data, test_data = train, test

# split = int(len(data) * 0.8)
#
# train, test = random_split(data, [split, len(data) - split])
# train_loader = DataLoader(train, batch_size=64, shuffle=True)
# test_loader = DataLoader(train, batch_size=64)


in_channels = 1
hidden_channels = [4, 5, 6]
out_features = 10 #len(data.label_mapping)
learning_rate = 0.001
epochs = 80

net = Net(in_channels, hidden_channels, out_features)
optimizer = torch.optim.Adam(net.parameters(),lr=learning_rate)#torch.optim.SGD(net.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

train_losses = []
train_accs = []
test_accs = []
device = try_gpu()

for epoch in range(epochs):
    start = time.time()
    net.train()
    net.to(device)

    for i, (x_batch, y_batch) in enumerate(train_loader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()

        y_pred = net(x_batch.float())

        loss = criterion(y_pred, y_batch)
        train_losses.append(loss)

        loss.backward()
        optimizer.step()

    train_acc = 100 * evaluate_accuracy(train_loader, net.to('cpu'))
    test_acc = 100 * evaluate_accuracy(test_loader, net.to('cpu'))

    train_accs.append(train_acc)
    test_accs.append(test_acc)

    end = time.time()

    print('Epoch: {:.0f}'.format(epoch + 1))
    print('Accuracy of train set: '+str(train_acc))
    print('Accuracy of test set: '+str(test_acc))
    print('Training Time Of Epoch: ' + str(end - start))
    print('')
