In [None]:
import numpy as np
from probreg import gmmtree
import tonic
from torch.utils.data.sampler import Sampler
from pathlib import Path
import time
import coreset

### parameters

In [None]:
dataset_name = 'POKERDVS' # name of dataset: POKERDVS -- NMNIST -- NCARS -- DVSGesture
download_dataset = True # downloads the datasets before parsing
first_saccade_only = False # specific for N-MNIST (3 saccades 100ms each)
subsample = 100 # take a sample of the dataset
tree_level = 3 # max number of levels for the GMM hierarchy
inference_level = 3 # at which level of the tree to do the inference
spatial_histograms = True
K = 10
coresets = False
Np = 2**16

### load dataset

In [None]:
start_time = time.time()

if dataset_name == 'NCARS': # 304 x 240
    train_set = tonic.datasets.NCARS(save_to='./data', train=True, download=download_dataset)
    test_set = tonic.datasets.NCARS(save_to='./data', train=False, download=download_dataset)
if dataset_name == 'POKERDVS': # 35 x 35
    train_set = tonic.datasets.POKERDVS(save_to='./data', train=True, download=download_dataset)
    test_set = tonic.datasets.POKERDVS(save_to='./data', train=False, download=download_dataset)
elif dataset_name == "DVSGesture": # 128 x 128
    train_set = tonic.datasets.DVSGesture(save_to='./data', train=True, download=download_dataset)
    test_set = tonic.datasets.DVSGesture(save_to='./data', train=False, download=download_dataset)
elif dataset_name == 'NMNIST': # 34 x 34
    train_set = tonic.datasets.NMNIST(save_to='./data/nmnist', train=True, download=download_dataset, first_saccade_only=first_saccade_only)
    test_set = tonic.datasets.NMNIST(save_to='./data/nmnist', train=False, download=download_dataset, first_saccade_only=first_saccade_only)
    
x_index = train_set.ordering.find('x')
y_index = train_set.ordering.find('y')
t_index = train_set.ordering.find('t')

In [None]:
# take a subset
train_index = np.arange(len(train_set))
np.random.shuffle(train_index)

test_index = np.arange(len(test_set))
np.random.shuffle(test_index)

if subsample > 0 and subsample < 100:
    print("Taking %s%% of the dataset" % subsample)
    
    # calculate number of samples we want to take
    train_samples = np.ceil((subsample * len(train_set)) / 100).astype(int)
    test_samples = np.ceil((subsample * len(test_set)) / 100).astype(int)
    
    # choosing indices of the subset
    train_index = train_index[:train_samples]
    test_index = test_index[:test_samples]

In [None]:
# custom sampler for torch dataloader
class custom_sampler(Sampler):
    """Samples elements from a given list of indices.
    
    Arguments:
        indices (list): a list of indices
    """

    def __init__(self, indices):
        self.num_samples = len(indices)
        self.indices = indices
     
    def __iter__(self):
        return iter(self.indices)

    def __len__(self):
        return self.num_samples

In [None]:
trainloader = tonic.datasets.DataLoader(train_set, sampler=custom_sampler(train_index), shuffle=False)
testloader = tonic.datasets.DataLoader(test_set, sampler=custom_sampler(test_index), shuffle=False)

# preparing training dataset
X_train = []
Y_train = np.zeros(len(trainloader))
for i, (events, target) in enumerate(trainloader):
    events = events.numpy().squeeze()
    data = np.vstack((events[:,t_index], events[:,x_index], events[:,y_index])).T
    X_train.append(data)
    Y_train[i] = target.numpy()[0]
X_train = np.vstack(X_train)

In [None]:
if coresets:
    X_train, weights = coreset.generate(X_train, Np)
print('--- %s seconds ---' % (time.time() - start_time))

### running hGMM model

In [None]:
start_time = time.time()
model, params, n_nodes = gmmtree.fit(X_train, tree_level=tree_level)
print('--- %s seconds ---' % (time.time() - start_time))

### preparing extracted features for classification

In [None]:
def create_histograms(dataloader, model, n_nodes, inference_level, dataset):
    x_index = dataset.ordering.find("x")
    y_index = dataset.ordering.find("y")
    t_index = dataset.ordering.find("t")
    X = []
    Y = np.zeros(len(dataloader))
    for i, (events, target) in enumerate(dataloader):
        events = events.numpy().squeeze()
        data = np.vstack((events[:,t_index], events[:,x_index], events[:,y_index])).T
        Y_pred = gmmtree.predict(model, data, inference_level)
        Y[i] = target.numpy()[0]
        
        X.append(np.histogram(Y_pred, bins=np.arange(0, n_nodes[inference_level-1]))[0])
    return X, Y

def create_spatial_histograms(dataloader, model, n_nodes, inference_level, dataset, K):
    sensor_size = dataset.sensor_size
    x_index = dataset.ordering.find("x")
    y_index = dataset.ordering.find("y")
    t_index = dataset.ordering.find("t")
    X = []
    Y = np.zeros(len(dataloader))
    n_cells = (sensor_size[0] // K+1) * (sensor_size[1] // K+1)
    for i, (events, target) in enumerate(dataloader):
        events = events.numpy().squeeze()
        data = np.vstack((events[:,t_index], events[:,x_index], events[:,y_index])).T
        Y_pred = gmmtree.predict(model, data, inference_level)
        Y[i] = target.numpy()[0]
        
        cells = []
        cell_index = 0
        for i in range(sensor_size[0] // K +1):
            for j in range(sensor_size[1] // K +1):
                xs = events[:,x_index]
                ys = events[:,y_index]
                selection = events[(xs >= i*K) & (xs < i*K+K) & (ys >= j*K) & (ys < j*K+K)]
                if len(selection) > 0:
                    cells.extend([cell_index] * len(selection))
                cell_index += 1
        
        hists = []
        for i in np.arange(n_cells):
            selection = (cells == i)
            if len(selection) > 0:
                hists.append(np.histogram(Y_pred[selection], bins=np.arange(0, n_nodes[inference_level-1]+1))[0])
            else:
                hists.append(np.zeros(n_nodes[inference_level-1]))
        X.append(np.concatenate(hists))
    return X, Y

### pytorch classification

In [None]:
from logreg import LogisticRegression
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import StepLR
from sklearn import preprocessing
from datetime import datetime

start_time = time.time()

# creating histograms from hard clusters
if spatial_histograms:
    train_features, train_labels = create_spatial_histograms(trainloader, model, n_nodes, inference_level, train_set, K)
    test_features, test_labels = create_spatial_histograms(testloader, model, n_nodes, inference_level, test_set, K)
else:
    train_features, train_labels = create_histograms(trainloader, model, n_nodes, inference_level, train_set)
    test_features, test_labels = create_histograms(testloader, model, n_nodes, inference_level, test_set)

# scale features to 0 mean and 1 variance
scaler = preprocessing.StandardScaler().fit(train_features)
train_features = scaler.transform(train_features)
test_features = scaler.transform(test_features)

# creating dataloaders
training_dataset = TensorDataset(torch.Tensor(train_features),torch.Tensor(train_labels))
training_dataloader = DataLoader(training_dataset, batch_size=128)

test_dataset = TensorDataset(torch.Tensor(test_features),torch.Tensor(test_labels))
test_dataloader = DataLoader(test_dataset, batch_size=128)

# finding unique classes
classes = np.unique(test_labels)

# training pytorch logistic regression
logreg = LogisticRegression(train_features.shape[1], len(classes), epochs=200, lr=0.01, step_size=30, gamma=1, momentum=0, weight_decay=0)
logreg.fit(training_dataloader)
score = logreg.score(test_dataloader)*100

# print score
print(score)

# save results
p = Path('benchmark_hgmm')
p.mkdir(exist_ok=True)
date = datetime.now().strftime("%Y_%m_%d-%I:%M:%S_%p")
filename = dataset_name+'_'+str(score)+'_'+str(n_nodes[inference_level-1])+'_sp-hist_'+str(spatial_histograms)+'_coreset_'+str(coresets)+'_'+date
np.save(p/filename, score)

print('--- %s seconds ---' % (time.time() - start_time))