In [1]:
import dlc_bci as bci

import torch
import numpy as np

from models import *
from callbacks import keep_best_model, store_best_model

from types import SimpleNamespace 


from torch import optim
from torch import nn
from torch import Tensor
from torch.autograd import Variable

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

%matplotlib inline
%load_ext autoreload
%autoreload 2

### Load

In [2]:
one_khz=False

train = SimpleNamespace()
train.X, train.y = bci.load(root='./data_bci', one_khz=one_khz)
print(str(type(train.X)), train.X.size())
print(str(type(train.y)), train.y.size())

test = SimpleNamespace()
test.X, test.y = bci.load(root='./data_bci', train=False, one_khz=one_khz)
print(str(type(test.X)), test.X.size())
print(str(type(test.y)), test.y.size())

<class 'torch.Tensor'> torch.Size([316, 28, 50])
<class 'torch.Tensor'> torch.Size([316])
<class 'torch.Tensor'> torch.Size([100, 28, 50])
<class 'torch.Tensor'> torch.Size([100])


In [3]:
# torch.manual_seed(0) 

In [4]:
from collections import OrderedDict

def cross_validate_grid_search(
    modelClass, 
    train, 
    epochs=100,
    n_splits=5,
    grid_search_on=OrderedDict([
        ("nb_hidden", [np.asscalar(n) for n in np.arange(40, 201, 40)]),
        ("activation", [nn.ReLU, nn.Tanh, nn.ELU]),
        ("optimizer", [optim.Adam, optim.Adadelta, optim.Adamax]),
        ("weight_decay", [np.asscalar(wd) for wd in np.logspace(-6, -2, 4)]),
        ("dropout", [np.asscalar(d) for d in np.linspace(0, 0.30, 4)])
    ])
):
    """ Apply a simplified grid search on the parameters in "grid_search_on" and optimize
    one at a time (to keep the complexity linear).    
    Finally save the best configuration.
    """
    
    X_tr, y_tr = modelClass.prepare_data(train)
    X_tr = X_tr.to(device)
    y_tr = y_tr.to(device)
    
    print("Optimizing in order:", grid_search_on.keys())
    # here we store the best parameters as we find them (initialized with just the first parameter)
    bestParams = {}
    for param in grid_search_on.keys():
        bestParams[param] = grid_search_on[param][0]
    
    for param_name in grid_search_on.keys():
        modelScores = {
            "tr_scores": [],
            "va_scores": []
        }
        param_values = grid_search_on[param_name]
        print("\nCross validation on", param_name+":")
        for n in param_values: # scan through the parameter values
            print("------", param_name, "=", str(n), "------")
            bestParams[param_name] = n
            model = modelClass(**bestParams)
            model.to(device)
            result = model.cross_validate(X_tr, y_tr, verbose=False, epochs=epochs, n_splits=n_splits)

            modelScores["tr_scores"].append(np.asscalar(np.mean(result["train_score"])))
            modelScores["va_scores"].append(np.asscalar(np.mean(result["test_score"])))

        # save the obtained scores 
        model.save_data(modelScores, "cross_validation/scores/"+param_name, pickle_protocol=0)
        # get the best parameter
        bestParams[param_name] = param_values[np.argmax(modelScores["va_scores"])]
        print("Best", param_name + ":", str(bestParams[param_name]) + ". Score:", np.max(modelScores["va_scores"]))
    
    model.save_data(grid_search_on, "cross_validation/tried_params", pickle_protocol=0)
    model.save_data(bestParams, "cross_validation/best_params", pickle_protocol=0)
    # [np.asscalar(wd) for wd in np.logspace(-6, -1, 4)]
    # best_n = n_hidden_values[np.argmax(modelScores["va_scores"])]
    # print('Best #hidden units:', best_n)
    # print('Test score:',
    #       CNN2D(n)
    #       .fit(X_tr, y_tr, epochs=50)
    #       .score(X_te, y_te))

- cross validate gird search on `CNN2D_MaxPool`

In [11]:
cross_validate_grid_search(CNN2D_MaxPool, train, epochs=100)

Optimizing in order: odict_keys(['nb_hidden', 'activation', 'optimizer', 'weight_decay', 'dropout'])

Cross validation on nb_hidden:
------ nb_hidden = 40 ------


  sum_loss_train += loss.data[0].item()
  test_loss = self.criterion(self(X_test), y_test).data[0] if compute_test_err else None


------ nb_hidden = 80 ------
------ nb_hidden = 120 ------
------ nb_hidden = 160 ------
------ nb_hidden = 200 ------

Cross validation on activation:
------ activation = <class 'torch.nn.modules.activation.ReLU'> ------
------ activation = <class 'torch.nn.modules.activation.Tanh'> ------
------ activation = <class 'torch.nn.modules.activation.ELU'> ------

Cross validation on optimizer:
------ optimizer = <class 'torch.optim.adam.Adam'> ------
------ optimizer = <class 'torch.optim.adadelta.Adadelta'> ------
------ optimizer = <class 'torch.optim.adamax.Adamax'> ------

Cross validation on weight_decay:
------ weight_decay = 1e-06 ------
------ weight_decay = 2.1544346900318823e-05 ------
------ weight_decay = 0.00046415888336127773 ------
------ weight_decay = 0.01 ------

Cross validation on dropout:
------ dropout = 0.0 ------
------ dropout = 0.09999999999999999 ------
------ dropout = 0.19999999999999998 ------
------ dropout = 0.3 ------


- cross validate gird search on `CNN_1D_MaxPool`

In [6]:
cross_validate_grid_search(CNN_1D_MaxPool, train, epochs=100)

Optimizing in order: odict_keys(['nb_hidden', 'activation', 'optimizer', 'weight_decay', 'dropout'])

Cross validation on nb_hidden:
------ nb_hidden = 40 ------


  sum_loss_train += loss.data[0].item()
  test_loss = self.criterion(self(X_test), y_test).data[0] if compute_test_err else None


------ nb_hidden = 80 ------
------ nb_hidden = 120 ------
------ nb_hidden = 160 ------
------ nb_hidden = 200 ------
Best nb_hidden: 40. Score: 0.765773809524

Cross validation on activation:
------ activation = <class 'torch.nn.modules.activation.ReLU'> ------
------ activation = <class 'torch.nn.modules.activation.Tanh'> ------
------ activation = <class 'torch.nn.modules.activation.ELU'> ------
Best activation: <class 'torch.nn.modules.activation.ReLU'>. Score: 0.740376984127

Cross validation on optimizer:
------ optimizer = <class 'torch.optim.adam.Adam'> ------
------ optimizer = <class 'torch.optim.adadelta.Adadelta'> ------
------ optimizer = <class 'torch.optim.adamax.Adamax'> ------
Best optimizer: <class 'torch.optim.adamax.Adamax'>. Score: 0.753174603175

Cross validation on weight_decay:
------ weight_decay = 1e-06 ------
------ weight_decay = 2.1544346900318823e-05 ------
------ weight_decay = 0.00046415888336127773 ------
------ weight_decay = 0.01 ------
Best weight_d

- cross validate gird search on `CNN_1D_BatchNorm`

In [7]:
cross_validate_grid_search(CNN_1D_BatchNorm, train, epochs=100)

Optimizing in order: odict_keys(['nb_hidden', 'activation', 'optimizer', 'weight_decay', 'dropout'])

Cross validation on nb_hidden:
------ nb_hidden = 40 ------


  sum_loss_train += loss.data[0].item()
  test_loss = self.criterion(self(X_test), y_test).data[0] if compute_test_err else None


------ nb_hidden = 80 ------
------ nb_hidden = 120 ------
------ nb_hidden = 160 ------
------ nb_hidden = 200 ------
Best nb_hidden: 120. Score: 0.677380952381

Cross validation on activation:
------ activation = <class 'torch.nn.modules.activation.ReLU'> ------
------ activation = <class 'torch.nn.modules.activation.Tanh'> ------
------ activation = <class 'torch.nn.modules.activation.ELU'> ------
Best activation: <class 'torch.nn.modules.activation.Tanh'>. Score: 0.78814484127

Cross validation on optimizer:
------ optimizer = <class 'torch.optim.adam.Adam'> ------
------ optimizer = <class 'torch.optim.adadelta.Adadelta'> ------
------ optimizer = <class 'torch.optim.adamax.Adamax'> ------
Best optimizer: <class 'torch.optim.adam.Adam'>. Score: 0.787996031746

Cross validation on weight_decay:
------ weight_decay = 1e-06 ------
------ weight_decay = 2.1544346900318823e-05 ------
------ weight_decay = 0.00046415888336127773 ------
------ weight_decay = 0.01 ------
Best weight_decay

- cross validate gird search on `CNN_1D_BatchNorm_Dial`

In [15]:
cross_validate_grid_search(CNN_1D_BatchNorm_Dial, train, epochs=100)

Optimizing in order: odict_keys(['nb_hidden', 'activation', 'optimizer', 'weight_decay', 'dropout'])

Cross validation on nb_hidden:
------ nb_hidden = 40 ------


  sum_loss_train += loss.data[0].item()
  test_loss = self.criterion(self(X_test), y_test).data[0] if compute_test_err else None


------ nb_hidden = 80 ------
------ nb_hidden = 120 ------
------ nb_hidden = 160 ------
------ nb_hidden = 200 ------
Best nb_hidden: 120. Score: 0.683829365079

Cross validation on activation:
------ activation = <class 'torch.nn.modules.activation.ReLU'> ------
------ activation = <class 'torch.nn.modules.activation.Tanh'> ------
------ activation = <class 'torch.nn.modules.activation.ELU'> ------
Best activation: <class 'torch.nn.modules.activation.Tanh'>. Score: 0.775446428571

Cross validation on optimizer:
------ optimizer = <class 'torch.optim.adam.Adam'> ------
------ optimizer = <class 'torch.optim.adadelta.Adadelta'> ------
------ optimizer = <class 'torch.optim.adamax.Adamax'> ------
Best optimizer: <class 'torch.optim.adamax.Adamax'>. Score: 0.787996031746

Cross validation on weight_decay:
------ weight_decay = 1e-06 ------
------ weight_decay = 2.1544346900318823e-05 ------
------ weight_decay = 0.00046415888336127773 ------
------ weight_decay = 0.01 ------
Best weight_

- cross validate gird search on `CNN_1D_Residual`

In [5]:
cross_validate_grid_search(CNN_1D_Residual, train, epochs=100)

Optimizing in order: odict_keys(['nb_hidden', 'activation', 'optimizer', 'weight_decay', 'dropout'])

Cross validation on nb_hidden:
------ nb_hidden = 40 ------


  sum_loss_train += loss.data[0].item()
  test_loss = self.criterion(self(X_test), y_test).data[0] if compute_test_err else None


------ nb_hidden = 80 ------
------ nb_hidden = 120 ------
------ nb_hidden = 160 ------
------ nb_hidden = 200 ------
Best nb_hidden: 40. Score: 0.731001984127

Cross validation on activation:
------ activation = <class 'torch.nn.modules.activation.ReLU'> ------
------ activation = <class 'torch.nn.modules.activation.Tanh'> ------
------ activation = <class 'torch.nn.modules.activation.ELU'> ------
Best activation: <class 'torch.nn.modules.activation.Tanh'>. Score: 0.721329365079

Cross validation on optimizer:
------ optimizer = <class 'torch.optim.adam.Adam'> ------
------ optimizer = <class 'torch.optim.adadelta.Adadelta'> ------
------ optimizer = <class 'torch.optim.adamax.Adamax'> ------
Best optimizer: <class 'torch.optim.adadelta.Adadelta'>. Score: 0.731051587302

Cross validation on weight_decay:
------ weight_decay = 1e-06 ------
------ weight_decay = 2.1544346900318823e-05 ------
------ weight_decay = 0.00046415888336127773 ------
------ weight_decay = 0.01 ------
Best weig

### random stuff

In [6]:

# [a.device for a in model.parameters()]

In [6]:
grid_search_on=OrderedDict([
    ("nb_hidden", [np.asscalar(n) for n in np.arange(40, 201, 40)]),
    ("activation", [nn.ReLU, nn.Tanh, nn.ELU]),
    ("optimizer", [optim.Adam, optim.Adadelta, optim.Adamax]),
    ("weight_decay", [np.asscalar(wd) for wd in np.logspace(-6, -1, 4)])
])
bestParams = {}
for param in grid_search_on.keys():
    bestParams[param] = grid_search_on[param][0]
    
X_tr, y_tr = CNN2D_MaxPool.prepare_data(train)
X_tr = X_tr.to(device)
y_tr = y_tr.to(device)

model = CNN2D_MaxPool().to(device)
model.fit(X_tr, y_tr)

  sum_loss_train += loss.data[0]


Epoch 0: Train loss: tensor(16.1970, device='cuda:1'). Train accuracy 52.22%. 
Epoch 1: Train loss: tensor(11.6109, device='cuda:1'). Train accuracy 60.44%. 
Epoch 2: Train loss: tensor(11.1342, device='cuda:1'). Train accuracy 50.95%. 
Epoch 3: Train loss: tensor(11.0515, device='cuda:1'). Train accuracy 63.29%. 
Epoch 4: Train loss: tensor(11.0026, device='cuda:1'). Train accuracy 63.61%. 
Epoch 5: Train loss: tensor(11.0281, device='cuda:1'). Train accuracy 64.87%. 
Epoch 6: Train loss: tensor(10.8853, device='cuda:1'). Train accuracy 66.46%. 
Epoch 7: Train loss: tensor(10.7349, device='cuda:1'). Train accuracy 68.35%. 
Epoch 8: Train loss: tensor(10.6229, device='cuda:1'). Train accuracy 68.35%. 
Epoch 9: Train loss: tensor(10.6490, device='cuda:1'). Train accuracy 66.77%. 
Epoch 10: Train loss: tensor(10.7949, device='cuda:1'). Train accuracy 67.09%. 
Epoch 11: Train loss: tensor(10.1920, device='cuda:1'). Train accuracy 71.20%. 
Epoch 12: Train loss: tensor(10.2017, device='cuda

CNN2D_MaxPool(
  (features): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 7), stride=(1, 1), padding=(1, 3))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU()
    (3): Dropout(p=0.1)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): MaxPool2d(kernel_size=(2, 5), stride=(2, 5), padding=0, dilation=1, ceil_mode=False)
    (6): ReLU()
    (7): Dropout(p=0.1)
    (8): Conv2d(64, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (9): MaxPool2d(kernel_size=2, stride=2, padding=(1, 1), dilation=1, ceil_mode=False)
    (10): ReLU()
    (11): Dropout(p=0.1)
  )
  (classifier): Sequential(
    (0): Linear(in_features=384, out_features=50, bias=True)
    (1): ReLU()
    (2): Linear(in_features=50, out_features=2, bias=True)
  )
  (criterion): CrossEntropyLoss()
)

In [10]:
y = y_tr
true = y.data.max(1)[1] if y.dim() == 2 else y.data
true

tensor([ 0,  1,  0,  1,  0,  1,  0,  1,  0,  0,  1,  1,  0,  0,
         1,  1,  1,  0,  0,  1,  0,  0,  1,  0,  1,  0,  0,  0,
         0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  0,  1,  0,  1,
         0,  0,  1,  0,  1,  1,  0,  1,  1,  0,  0,  1,  0,  0,
         1,  0,  1,  0,  1,  0,  0,  1,  0,  0,  0,  0,  0,  1,
         0,  1,  0,  0,  0,  0,  0,  1,  1,  1,  0,  1,  0,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  1,  1,
         1,  0,  1,  1,  1,  0,  1,  0,  1,  1,  1,  0,  1,  0,
         1,  0,  0,  0,  1,  0,  0,  1,  1,  1,  1,  0,  1,  1,
         0,  0,  1,  1,  1,  1,  1,  0,  0,  0,  0,  1,  1,  0,
         1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  1,  1,  0,  0,
         1,  1,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  0,
         1,  0,  1,  1,  0,  1,  0,  0,  1,  1,  1,  0,  0,  1,
         0,  0,  0,  0,  0,  1,  1,  0,  0,  0,  0,  1,  0,  0,
         0,  1,  0,  0,  1,  0,  0,  0,  1,  1,  1,  0,  0,  0,
         1,  1,  1,  0,  0,  0,  1,  0, 

In [37]:
a = torch.tensor(Tensor(3000, 3000).uniform_(-10, 10))
b = torch.tensor(Tensor(3000, 3000).uniform_(-10, 10))

In [38]:
%timeit a@b
a_gpu = a.to(device)
b_gpu = b.to(device)
%timeit a_gpu@b_gpu

1 loop, best of 3: 221 ms per loop
1000 loops, best of 3: 5.33 ms per loop


In [33]:
a.is_cuda, a_gpu.is_cuda

(False, True)

In [6]:
torch.tensor([1, 1])/10

tensor([ 0])

In [None]:
import torch
from torch import nn

device = torch.device("cuda:0")
# device = torch.device("cpu")

x = torch.Tensor(10, 2).to(device)
linear = nn.Linear(2, 4)
o = optim.Adagrad(linear.parameters())
# o = optim.Adam(linear.parameters())

linear.to(device)

linear(x).mean().backward()
o.step()