In [2]:
%load_ext autoreload
%autoreload 2

import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

import sys
sys.path.append("../")

from cont_speech_experiment import ContinuousSpeechExperiment
from nupic.research.frameworks.backprop_structure.modules.binary_layers import BinaryGatedConv2d, BinaryGatedLinear
from nupic.torch.modules import (
    Flatten,
    KWinners,
    KWinners2d,
    SparseWeights,
    SparseWeights2d,
)

from nupic.research.support.parse_config import parse_config
from nupic.research.frameworks.continuous_learning.utils import train_model, clear_labels
from nupic.research.frameworks.pytorch.model_utils import evaluate_model

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
config_file = "../experiments.cfg"
with open(config_file) as cf:
    config_init = parse_config(cf)
    
exp = "sparseCNN2"

config = config_init[exp]
config["name"] = exp

experiment = ContinuousSpeechExperiment(config=config)

Creating optimizer with learning rate= 0.01


In [38]:
input_size = (1, 32, 32)
cnn_channels = (128, 64)
kernel_size=5

m = BinaryGatedConv2d(in_channels=1,
                     out_channels=cnn_channels[0],
                     kernel_size=kernel_size).cuda()

k1 = KWinners2d(cnn_channels[0],
               percent_on=0.1,).cuda()

In [9]:
x, y = get_example()

In [103]:
class ToyNet(nn.Module):
    def __init__(self,
                input_size=(1,32,32),
                n_classes=11,
                cnn_channels=(64, 64),
                linear_n=(1000,),
                cnn_droprate_init=(0.5,0.5),
                linear_droprate_init=(0.5, 0.5),
                l0_strength =(0.5, 0.5),
                l2_strength=(0.5, 0.5),
                cnn_pct_on=(0.1,0.1),
                linear_pct_on=(0.1,),
                boost_strength=(1., 1.),
                boost_strength_factor=(0.9, 0.9),
                duty_cycle_period=(1000, 1000),
                batch_norm=True,
                ):
        super(ToyNet, self).__init__()
        
        
        self.cnn_channels = cnn_channels
        self.linear_n = linear_n
        self.cnn_pct_on = cnn_pct_on
        self.linear_pct_on = linear_pct_on
        self.boost_strength = boost_strength
        self.boost_strength_factor = boost_strength_factor
        self.duty_cycle_period = duty_cycle_period
        self.batch_norm = batch_norm
    
        self.cnn1 = BinaryGatedConv2d(in_channels=input_size[0],
                                     out_channels=cnn_channels[0],
                                     kernel_size=5,
                                     droprate_init=cnn_droprate_init[0],
                                     l0_strength=l0_strength[0],
                                     l2_strength=l2_strength[0],
                                     )
        
        self.bn1 = nn.BatchNorm2d(cnn_channels[0], affine=False)
        
        self.mp1 = nn.MaxPool2d(2)
        
        self.k1 = KWinners2d(channels=cnn_channels[0],
                            percent_on=cnn_pct_on[0],
                            boost_strength=boost_strength[0],
                            boost_strength_factor=boost_strength_factor[0],
                            duty_cycle_period=duty_cycle_period[0],)
        
        self.flatten = Flatten()
        
        self.linear1 = BinaryGatedLinear(in_features=self.conv_out(cnn_channels[0]), 
                                        out_features=linear_n[0],
                                        droprate_init=linear_droprate_init[0],
                                        l0_strength=l0_strength[1],
                                        l2_strength=l2_strength[1],
                                    )
        
        self.bn2 = nn.BatchNorm1d(linear_n[0], affine=False)
        
        self.linear1_k = KWinners(n=linear_n[0], 
                                 percent_on=linear_pct_on[0],
                                 boost_strength=boost_strength[1], # -- NOTE -- replace this if you add 2nd conv layer
                                 boost_strength_factor=boost_strength_factor[1],
                                 duty_cycle_period=duty_cycle_period[1],        
                                 )
        
        self.output = nn.Linear(linear_n[0], n_classes)
        self.log_softmax = nn.LogSoftmax(dim=1)


    def forward(self, x):
        # Conv component
        x = self.cnn1(x)
        if self.batch_norm:
            x = self.bn1(x)
        x = self.mp1(x)
        x = self.k1(x)
        # flatten
        x = self.flatten(x)
        # Linear component
        x = self.linear1(x)
        if self.batch_norm:
            x = self.bn2(x)
        x = self.linear1_k(x)
        
        # output
        x = self.output(x)
        x = self.log_softmax(x)
        
        return x
                                         
    def conv_out(self, cnn_channels):
        if cnn_channels == 64:
            out_size = 12544
        elif cnn_channels == 256:
            out_size = 50176
        return out_size

In [104]:
num_classes = 5
net = ToyNet(n_classes=num_classes,
            cnn_channels=(256,256),
            linear_n=(2048,),
            cnn_droprate_init=(0.1,),
            linear_droprate_init=(0.1,),
            boost_strength=(0., 0.),
            boost_strength_factor=(0., 0.),
            duty_cycle_period=(20000, 20000),
            ).cuda()

In [None]:
class_inds = np.arange(1,5).reshape(2,2) # [1, 2], [3, 4]
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

for frac_batches in np.arange(0.6, 0.05, -0.1):
    for l0 in (1., 0.5, 0.2):
        for l2 in (1., 0.5, 0.2):
    
            net = ToyNet(n_classes=num_classes,
                    cnn_channels=(256,256),
                    linear_n=(2048,),
                    cnn_droprate_init=(0.1,),
                    linear_droprate_init=(0.1,),
                    l0_strength=(l0,l0),
                    l2_strength=(l2,l2),
                    boost_strength=(0., 0.),
                    boost_strength_factor=(0., 0.),
                    duty_cycle_period=(20000, 20000),
                    ).cuda()
        
            for j in range(len(class_inds)):
                experiment.combine_classes(class_inds[j])
                train_model(net, experiment.train_loader, optimizer,
                           device=torch.device("cuda"), sample_fraction=frac_batches,
                           freeze_output=True, layer_type="dense",
                           output_indices=clear_labels(class_inds[j], length=num_classes)
                           )
                accs = [np.round(test_model(net, class_loader),2)
                        for class_loader in experiment.test_loader[:num_classes]]

                print("frac_train: {:.2f}, l_0 reg: {:.2f}, l_2 reg: {:.2f}".format(frac_batches, l0, l2))
                print(accs)

frac_train: 0.60, l_0 reg: 1.00, l_2 reg: 1.00
[0, 0.0, 0.01, 0.58, 0.44]
frac_train: 0.60, l_0 reg: 1.00, l_2 reg: 1.00
[0, 0.02, 0.02, 0.73, 0.22]
frac_train: 0.60, l_0 reg: 1.00, l_2 reg: 0.50
[0, 0.46, 0.06, 0.05, 0.11]
frac_train: 0.60, l_0 reg: 1.00, l_2 reg: 0.50
[0, 0.41, 0.02, 0.1, 0.19]
frac_train: 0.60, l_0 reg: 1.00, l_2 reg: 0.20
[0, 0.01, 0.06, 0.27, 0.06]
frac_train: 0.60, l_0 reg: 1.00, l_2 reg: 0.20
[0, 0.07, 0.12, 0.24, 0.04]
frac_train: 0.60, l_0 reg: 0.50, l_2 reg: 1.00
[0, 0.14, 0.19, 0.35, 0.04]
frac_train: 0.60, l_0 reg: 0.50, l_2 reg: 1.00
[0, 0.2, 0.16, 0.19, 0.07]
frac_train: 0.60, l_0 reg: 0.50, l_2 reg: 0.50
[0, 0.46, 0.06, 0.18, 0.04]
frac_train: 0.60, l_0 reg: 0.50, l_2 reg: 0.50
[0, 0.42, 0.08, 0.32, 0.03]
frac_train: 0.60, l_0 reg: 0.50, l_2 reg: 0.20
[0, 0.17, 0.09, 0.0, 0.26]
frac_train: 0.60, l_0 reg: 0.50, l_2 reg: 0.20
[0, 0.25, 0.17, 0.0, 0.12]
frac_train: 0.60, l_0 reg: 0.20, l_2 reg: 1.00
[0, 0.12, 0.39, 0.34, 0.15]
frac_train: 0.60, l_0 reg: 0.2

In [107]:
loader.dataset.tensors[1].shape[0]

20482

### Little helper functions to avoid crowding

In [8]:
def test_model(model, loader):
    tst = evaluate_model(model,
                        loader,
                        device=torch.device("cuda"),
                        )
    return tst["mean_accuracy"]

def get_example(loader=None):
    if loader is None:
        loader = experiment.full_train_loader
    x, y = next(iter(loader))
    return x, y

In [7]:
net = LeNetBackpropStructure(input_size=(1, 32, 32),
                             num_classes=11,
                            droprate_init=0.1).cuda()

loader = experiment.full_train_loader
test_loader = experiment.gen_test_loader
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

In [8]:
for epoch in range(3):
    train_model(net, loader, optimizer, device=torch.device("cuda"), normalize_input=False)
    res = evaluate_model(net, test_loader, device=torch.device("cuda"))
    print(res["mean_accuracy"])

0.5184748427672956
0.4669811320754717
0.6344339622641509


In [35]:
evaluate_model(net, test_loader, device=torch.device("cuda"))

{'total_correct': 1740,
 'total_tested': 2544,
 'mean_loss': -2332.2959905660377,
 'mean_accuracy': 0.6839622641509434}

In [80]:
loader = experiment.full_train_loader

In [85]:
loader.dataset.tensors[1].shape

(20482,)

In [86]:
loader.b

16