In [146]:
from torchvision import datasets, transforms, models
from torch import nn, optim, utils, device as device_, cuda
import torch
import torch.nn as nn
import numpy as np
from sklearn import metrics
import time
import sparseconvnet as scn
import math

In [2]:
dataset_train = datasets.MNIST(
    './data', 
    train=True, 
    download=True, 
    transform=transforms.ToTensor())
dataset_valid = datasets.MNIST(
    './data', 
    train=False, 
    download=True, 
    transform=transforms.ToTensor())

In [None]:
dataloader_train = utils.data.DataLoader(dataset_train,
                                          batch_size=1000,
                                          shuffle=True,
                                          num_workers=4)
dataloader_valid = utils.data.DataLoader(dataset_valid,
                                          batch_size=1000,
                                          shuffle=True,
                                          num_workers=4)

In [25]:
dataloader_train = utils.data.DataLoader(dataset_train,
                                         collate_fn=train_merge,
                                          batch_size=1,
                                          shuffle=True)

In [113]:
class Data(torch.utils.data.Dataset):
    def __init__(self,file):
        torch.utils.data.Dataset.__init__(self)
        
        N_DATA = 100
        
        data = torch.load(file)
        self.data = []
        # self.data = data[0] # array of img tensors
        self.labels = data[1] # array of labels
        
        for i, img in enumerate(data[0]): # for each data
            
            if i >= N_DATA: break
                
            coords = []
            features = []
            for y in range(len(img)): # for y coordinates
                for x in range(len(img[y])): # for x coordinates
                    if img[y][x] != 0:
                        coords.append(torch.LongTensor([y, x]))
                        features.append(img[y][x].float())
            
            single_data = {}
            single_data['coords'] = torch.stack(coords)
            single_data['features'] = torch.stack(features)
            self.data.append(single_data)
        
        for i,x in enumerate(self.data):
            x['idx'] = i
        
        print('Loaded ', len(self.data), ' points')
                
    
    def __getitem__(self, n):
        return self.data[n], self.labels[n]

    def __len__(self):
        return len(self.data)
        

In [114]:
dataset = Data('data/MNIST/processed/training.pt')

Loaded  100  points


In [219]:
def TrainMergeFn(tbl, spatial_size=28):
    
    # tbl has batch_size data
    
    locations = []
    features = []
    targets = []
    
    for data, labels in tbl:
        """
        coords = 
            tensor([[ x1, y1,  0],
            [ x2, y2,  0]])
        x y coordinates plus data index number
        """
        coords = torch.cat([data['coords'].long(), torch.LongTensor([data['idx']]).expand([data['coords'].size(0), 1])], 1)
        locations.append(coords)
        features.append(data['features'])
        targets.append(labels)

    # return {'input': scn.InputLayerInput(torch.cat(locations, 0), torch.cat(features, 0)), 'target':torch.LongTensor(targets)}
    # return scn.batch_location_tensors(locations), torch.cat(features, 0), torch.LongTensor(targets)
    return torch.cat(locations, 0), torch.cat(features, 0), torch.LongTensor(targets)


In [220]:
dataloader_train = torch.utils.data.DataLoader(dataset, collate_fn=TrainMergeFn, batch_size=10, shuffle=False)

In [221]:
for i in dataloader_train:
    print(i)
    break

(tensor([[ 5, 12,  0],
        [ 5, 13,  0],
        [ 5, 14,  0],
        ...,
        [23, 10,  9],
        [23, 11,  9],
        [23, 12,  9]]), tensor([  3.,  18.,  18.,  ..., 122., 252.,  82.]), tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4]))


In [137]:
device = device_("cuda" if cuda.is_available() else "cpu")
device

device(type='cuda')

In [227]:
class Model(nn.Module):
    
    def __init__(self):
        nn.Module.__init__(self)
        data_dimension = 0
        data_fullscale = 28*28
        m = 4
        self.input = scn.InputLayer(data_dimension,data_fullscale, mode=0)
        self.conv = scn.SubmanifoldConvolution(data_dimension, 3, m, 3, False)
        self.out = scn.OutputLayer(10)
        
#         self.sparseModel = scn.Sequential().add(
#            scn.InputLayer(data_dimension,data_fullscale, mode=0)).add(
#            scn.SubmanifoldConvolution(data_dimension, 3, m, 3, False)).add(
#            scn.BatchNormReLU(m)).add(
#            scn.OutputLayer(1))
        self.linear = nn.Linear(m, 10)
        
    def forward(self,x):
        # x=self.sparseModel(x)
        x=self.input(x)
        x=self.conv(x)
        x=self.out(x)
        x=self.linear(x)
        return x

In [228]:
model = Model()


p={}
p['n_epochs'] = 20
p['initial_lr'] = 1e-1
p['lr_decay'] = 0.02
p['weight_decay'] = 1e-4
p['momentum'] = 0.9
p['check_point'] = False
p['epoch']=1
device = 'cuda:0'
model.to(device)

optimizer = torch.optim.SGD(model.parameters(),
    lr=p['initial_lr'],
    momentum = p['momentum'],
    weight_decay = p['weight_decay'],
    nesterov=True)

print(p)
print('#parameters', sum([x.nelement() for x in model.parameters() ]))

{'n_epochs': 20, 'initial_lr': 0.1, 'lr_decay': 0.02, 'weight_decay': 0.0001, 'momentum': 0.9, 'check_point': False, 'epoch': 1}
#parameters 62


In [229]:
for epoch in range(p['epoch'], p['n_epochs'] + 1):
    model.train()
    stats = {'n': 0, 'c': 0, 'loss': 0}
    for param_group in optimizer.param_groups:
        param_group['lr'] = p['initial_lr'] * \
        math.exp((1 - epoch) * p['lr_decay'])
        
    scn.forward_pass_multiplyAdd_count=0
    scn.forward_pass_hidden_states=0
    start = time.time()
    
    for locations, features, targets in dataloader_train:
        optimizer.zero_grad()
        print(locations)
        print(features)
        print(targets)
        predictions=model([locations,features.to(device)])
        targets=targets.to(device)
        loss = F.cross_entropy(predictions,targets)
        loss.backward()
        optimizer.step()
        
        with torch.no_grad():
            predictions=predictions[targets>=0]
            targets=targets[targets>=0]
            stats['n']+=predictions.size(0)
            stats['c']+=(predictions.max(1)[1]==targets).long().sum().item()
            stats['loss']+=loss*predictions.size(0)
        if epoch<=1:
            print('train',loss.item(),stats['c']/stats['n'],stats['loss']/stats['n'])
    print('train epoch',epoch,stats['c']/stats['n'],
        'MegaMulAdd=',scn.forward_pass_multiplyAdd_count/795/1e6, 'MegaHidden',scn.forward_pass_hidden_states/795/1e6,'time=',time.time() - start,'s')


tensor([[ 5, 12,  0],
        [ 5, 13,  0],
        [ 5, 14,  0],
        ...,
        [23, 10,  9],
        [23, 11,  9],
        [23, 12,  9]])
tensor([  3.,  18.,  18.,  ..., 122., 252.,  82.])
tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4])


AttributeError: module 'sparseconvnet.SCN' has no attribute 'Metadata_0'

In [8]:
# Playgrond

In [7]:
# Playground