In [1]:
import os
import io
import time
import pickle
import pandas as pd
import numpy as np
from numba import njit

import cv2
from skimage import transform
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torchvision
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP

from pthflops import count_ops
#MNIST 1x28x28

In [2]:
MNIST_root = r"C:\Users\Leo's PC\PycharmProjects\PD\MNIST"
data_transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

MNIST_train = torchvision.datasets.MNIST(MNIST_root, train=True, transform=data_transform, target_transform=None, download=True)
MNIST_val = torchvision.datasets.MNIST(MNIST_root, train=False, transform=data_transform, target_transform=None, download=True)

In [3]:
CIFAR10_root = r"C:\Users\Leo's PC\PycharmProjects\PD\CIFAR10"

CIFAR10_train = torchvision.datasets.CIFAR10(CIFAR10_root, train=True, transform=data_transform, target_transform=None, download=True)
CIFAR10_val = torchvision.datasets.CIFAR10(CIFAR10_root, train=False, transform=data_transform, target_transform=None, download=True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
train_loader = DataLoader(dataset=CIFAR10_train, batch_size=100,shuffle=True) # 3000 batches
val_loader = DataLoader(dataset=CIFAR10_val, batch_size=100, shuffle=False) # 500 batches

In [5]:
@njit
def avg_combine(a, b):
    #if a.shape[2] < b.shape[2]:
        #a = functional.interpolate(a, scale_factor=(b.shape[2] // a.shape[2]))
    #elif a.shape[2] > b.shape[2]:
    b = functional.interpolate(b, scale_factor=2.333333333333333333333333333333333)
        
    idx0 = 0
    for img in a:
        idx1 = 0
        for channel_a in img:
            channel_a = channel_a.add(b[idx0][idx1]) // 2
            idx1 += 1
        idx0 += 1
    
    return a

In [6]:
class BCNN(nn.Module):
    def __init__(self):
        super(BCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=[3,3], stride=1, padding=1)
        self.activation1 = nn.ReLU()
        
        self.pool1 = nn.MaxPool2d(kernel_size = [2, 2], stride=2, padding=0)
        
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=[3,3], stride=1)
        self.activation2 = nn.ReLU()
        
        self.pool2 = nn.MaxPool2d(kernel_size = [2, 2], stride=2, padding=0)
        
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=[3,3], stride=1, padding=1)
        self.activation3 = nn.ReLU()
        
        self.fc1 = nn.Linear(6272, 128)
        #self.fc2 = nn.Linear(1024, 128)
        self.fc3 = nn.Linear(128, 10)
        
        self.Sigmoid = nn.Sigmoid()
        self.Softmax = nn.Softmax(dim=0)
        
    def forward(self,x):
        x = self.conv1(x)
        x = self.activation1(x)
        
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.activation2(x)
        
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.activation3(x)
          
        x = x.view(x.size()[0], -1)
        
        x = self.fc1(x)
        x = self.Sigmoid(x)
        #x = self.fc2(x)
        #x = self.Sigmoid(x)
        x = self.fc3(x)
        x = self.Softmax(x)
        
        return x

In [7]:
class SCNN(nn.Module):
    def __init__(self):
        super(SCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=[3,3], stride=1, padding=1)
        self.activation1 = nn.ReLU()
        
        self.pool1 = nn.MaxPool2d(kernel_size = [4, 4], stride=4, padding=0)
        
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=[3,3], stride=1)
        self.activation2 = nn.ReLU()
        
        self.pool2 = nn.MaxPool2d(kernel_size = [4, 4], stride=4, padding=0)
        
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=[3,3], stride=1, padding=1)
        self.activation3 = nn.ReLU()
        
        self.fc1 = nn.Linear(128, 128)
        #self.fc2 = nn.Linear(1024, 128)
        self.fc3 = nn.Linear(128, 10)
        
        self.Sigmoid = nn.Sigmoid()
        self.Softmax = nn.Softmax(dim=0)
        
    def forward(self,x):
        x = self.conv1(x)
        x = self.activation1(x)
        
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.activation2(x)
        
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.activation3(x)
          
        x = x.view(x.size()[0], -1)
        
        x = self.fc1(x)
        x = self.Sigmoid(x)
        #x = self.fc2(x)
        #x = self.Sigmoid(x)
        x = self.fc3(x)
        x = self.Softmax(x)
        
        return x

In [8]:
class TCNN(nn.Module):
    def __init__(self):
        super(TCNN, self).__init__()
        
        self.conv1_1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=[3,3], stride=1, padding=1)
        self.activation1 = nn.ReLU()
        
        self.pool2_B = nn.MaxPool2d(kernel_size = [2, 2], stride=2, padding=0)
        self.pool2_S = nn.MaxPool2d(kernel_size = [4, 4], stride=4, padding=0)
        
        self.conv2_B = nn.Conv2d(in_channels=8, out_channels=64, kernel_size=[3,3], stride=1)
        self.conv2_S = nn.Conv2d(in_channels=24, out_channels=64, kernel_size=[3,3], stride=1)
        self.activation2 = nn.ReLU()
        
        self.pool3_B = nn.MaxPool2d(kernel_size = [2, 2], stride=2, padding=0)
        self.pool3_S = nn.MaxPool2d(kernel_size = [4, 4], stride=4, padding=0)
        
        self.conv3_B = nn.Conv2d(in_channels=16, out_channels=128, kernel_size=[3,3], stride=1, padding=1)
        self.conv3_S = nn.Conv2d(in_channels=48, out_channels=128, kernel_size=[3,3], stride=1, padding=1)
        self.activation3 = nn.ReLU()
        
        # fc1 has 2048 inputs
        #self.fc1 = nn.Linear(6272, 1024)
        #self.fc2 = nn.Linear(1024, 128)
        self.fc3 = nn.Linear(6272, 10)
        
        self.Sigmoid = nn.Sigmoid()
        self.Softmax = nn.Softmax(dim=0)
        
    def forward(self,x):
        x = self.conv1_1(x)
        x = self.activation1(x)
          
        x_0, x_1 = torch.split(x, [8, 24], dim=1)
        x_0 = self.pool2_B(x_0)
        x_1 = self.pool2_S(x_1)
        
        x_0, x_1 = self.conv2_B(x_0), self.conv2_S(x_1)
        x = avg_combine(x_0, x_1) # combine 64 feature maps on both sides into one (after upsampling the small one)
        x = self.activation2(x)

        
        x_0, x_1 = torch.split(x, [16, 48], dim=1)
        x_0 = self.pool3_B(x_0)
        x_1 = self.pool3_S(x_1) 
        
        x_0, x_1 = self.conv3_B(x_0), self.conv3_S(x_1)
        x = avg_combine(x_0, x_1)
        x = self.activation3(x)
        x = x.view(x.size()[0], -1)
        
        #x = self.fc1(x)
        #x = self.Sigmoid(x)
        #x = self.fc2(x)
        #x = self.Sigmoid(x)
        x = self.fc3(x)
        x = self.Softmax(x)
        
        return x

In [9]:
def one_hot_embedding(labels, num_classes):
    """Embedding labels to one-hot form.

    Args:
      labels: (LongTensor) class labels, sized [N,].
      num_classes: (int) number of classes.

    Returns:
      (tensor) encoded labels, sized [N, #classes].
    """
    y = torch.eye(num_classes) 
    return y[labels]

model = TCNN()

model = model.cuda()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

TCNN(
  (conv1_1): Conv2d(3, 32, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (activation1): ReLU()
  (pool2_B): MaxPool2d(kernel_size=[2, 2], stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool2_S): MaxPool2d(kernel_size=[4, 4], stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv2_B): Conv2d(8, 64, kernel_size=[3, 3], stride=(1, 1))
  (conv2_S): Conv2d(24, 64, kernel_size=[3, 3], stride=(1, 1))
  (activation2): ReLU()
  (pool3_B): MaxPool2d(kernel_size=[2, 2], stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool3_S): MaxPool2d(kernel_size=[4, 4], stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv3_B): Conv2d(16, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (conv3_S): Conv2d(48, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (activation3): ReLU()
  (fc3): Linear(in_features=6272, out_features=10, bias=True)
  (Sigmoid): Sigmoid()
  (Softmax): Softmax(dim=0)
)

In [10]:
optimizer = optim.Adam(params=model.parameters(), lr=0.001, betas=(0.9, 0.999))
criterion = nn.CrossEntropyLoss()

In [11]:
def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

model.apply(init_weights)

TCNN(
  (conv1_1): Conv2d(3, 32, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (activation1): ReLU()
  (pool2_B): MaxPool2d(kernel_size=[2, 2], stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool2_S): MaxPool2d(kernel_size=[4, 4], stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv2_B): Conv2d(8, 64, kernel_size=[3, 3], stride=(1, 1))
  (conv2_S): Conv2d(24, 64, kernel_size=[3, 3], stride=(1, 1))
  (activation2): ReLU()
  (pool3_B): MaxPool2d(kernel_size=[2, 2], stride=2, padding=0, dilation=1, ceil_mode=False)
  (pool3_S): MaxPool2d(kernel_size=[4, 4], stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv3_B): Conv2d(16, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (conv3_S): Conv2d(48, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
  (activation3): ReLU()
  (fc3): Linear(in_features=6272, out_features=10, bias=True)
  (Sigmoid): Sigmoid()
  (Softmax): Softmax(dim=0)
)

In [177]:
sample = torch.Tensor(np.zeros(784).reshape(1, 1, 28, 28))
sample = sample.cuda()
sample = sample.to(device)
prediction = model(sample)

RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[1, 1, 28, 28] to have 3 channels, but got 1 channels instead

In [12]:
global epoch #declear epoch global, to be used later by torch.save() 

for epoch in range(100):


    ave_loss = 0
    global loss #declear loss global, to be used later by torch.save() 
    for batch_idx, (data, target) in enumerate(train_loader):
        model.train() #set model to traning mode
        optimizer.zero_grad()
        data, target = data.float(), target.float() #set datatype
        data, target = data.to(device), target.to(device) #transfer to GPU
        data, target = Variable(data), Variable(target) #set to pytorch datatype: variable
        out = model(data) #forward pass
        loss = criterion(out, target.long()) #calculate loss
        ave_loss = ave_loss * 0.9 + loss.item() * 0.1 
        loss.backward() #back propagation with calculated loss
        optimizer.step() #calculate gradient and step
        if (batch_idx + 1) % 100 == 0 or (batch_idx + 1) == len(train_loader):
            print('==>>> epoch: {}, batch index: {}, train loss: {:.6f}'.format(epoch, batch_idx + 1, loss))

    correct, ave_loss = 0, 0
    total_cnt = 0

    for batch_idx, (data, target) in enumerate(val_loader):
        model.eval() #set model to evaluation mode
        data, target = data.float(), target.float() #set datatype
        data, target = data.to(device), target.to(device) #transfer to GPU
        data, target = Variable(data), Variable(target) #set to pytorch datatype: variable
       
        out = model(data)
        loss = criterion(out, target.long()) #calculate loss

        pred_label = out.data
        pred_label = pred_label.argmax(dim=1, keepdim=True)
        
        target = target.long()
        target_onehot = one_hot_embedding(target.data, 10)
        target_onehot = target_onehot.to(device)
        
        total_cnt += data.data.size()[0]
        ave_loss = ave_loss * 0.9 + loss.item() * 0.1 #smooth average
        correct += pred_label.eq(target.view_as(pred_label)).sum().item()
        
        if (batch_idx + 1) % 100 == 0 or (batch_idx + 1) == len(val_loader):
            print(
            '==>>> epoch: {}, batch index: {}, test loss: {:.6f}, acc: {:.3f}'.format(
                epoch, batch_idx + 1, ave_loss, correct * 1.0 / total_cnt))

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1mnon-precise type pyobject[0m
[0m[1mDuring: typing of argument at <ipython-input-5-d7a38bde558b> (6)[0m
[1m
File "<ipython-input-5-d7a38bde558b>", line 6:[0m
[1mdef avg_combine(a, b):
    <source elided>
    #elif a.shape[2] > b.shape[2]:
[1m    b = functional.interpolate(b, scale_factor=2.333333333333333333333333333333333)
[0m    [1m^[0m[0m

This error may have been caused by the following argument(s):
- argument 0: [1mcannot determine Numba type of <class 'torch.Tensor'>[0m
- argument 1: [1mcannot determine Numba type of <class 'torch.Tensor'>[0m


In [184]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(params)

156170


In [185]:
inp, _ = next(iter(train_loader))
inp = torch.Tensor(np.zeros(307200).reshape(100, 3, 32, 32))

inp = inp.cuda()
inp = inp.to(device)

count_ops(model, inp)

Operation        OPS        
---------------  ---------  
/onnx::Conv      91750400   
/onnx::Relu      6553600    
/onnx::MaxPool   614400     
/onnx::Conv      90316800   
/onnx::Relu      2508800    
/onnx::MaxPool   235200     
/onnx::Conv      90316800   
/onnx::Relu      1254400    
/onnx::Gemm      6272000    
--------------   --------   
Input size: (100, 3, 32, 32)
289,822,400 FLOPs or approx. 0.29 GFLOPs


(289822400,
 [['/onnx::Conv', 91750400],
  ['/onnx::Relu', 6553600],
  ['/onnx::MaxPool', 614400],
  ['/onnx::Conv', 90316800],
  ['/onnx::Relu', 2508800],
  ['/onnx::MaxPool', 235200],
  ['/onnx::Conv', 90316800],
  ['/onnx::Relu', 1254400],
  ['/onnx::Gemm', 6272000]])