In [1]:
# Remaining TODOs for this notebook
# custom Softmax and custom Cross Entropy loss as torch autograd functions
# run the custom version of this 

## 0. Set Args

In [2]:
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser

parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
args = parser.parse_args('')

args.data_dir = '/datadrive'
args.seed = 123
args.lr = 0.01

## 1. Off the shelf MLP implementation

In [3]:
import torch
from torch import nn
from torch.nn import functional as F

In [4]:
class MLPLazy(nn.Module):

    def __init__(self, nx, hidden_layer_dims, ny):
        super(MLPLazy, self).__init__()
        self.hidden_layer_dims = hidden_layer_dims
        
        # note that this is only possible with CPU training
        # push filled python list to nn.ModuleList if using GPU
        self.linear_layers = [] 
        last_dim = nx
        for next_dim in hidden_layer_dims:
            linear_layer = nn.Linear(last_dim, next_dim)
            self.linear_layers.append(linear_layer)
            last_dim = next_dim
        self.scorer = nn.Linear(last_dim, ny)

    def forward(self, X):
        '''
        X has shape (m, nx)
        '''
        last_X = X
        for i, linear_layer in enumerate(self.linear_layers):
            # shape (m, self.hidden_layer_dims[i])
            last_X = linear_layer(last_X)
            # shape (m, self.hidden_layer_dims[i])
            last_X = torch.relu(last_X)
        # shape (m, ny)
        z = self.scorer(last_X)
        # shape (m, ny)
        a = torch.softmax(z, dim=1)
        return z, a

## 2. Main Train & Pred Loop

In [5]:
def train(model, train_loader, valid_loader, loss_criterion, optimizer, args, epochs=20):
    '''
    Train model and report losses on train and dev sets per epoch
    '''
    
    history = {
        'train_losses': [],
        'valid_losses': [],        
        'valid_accuracy': [],
    }
    
    for epoch_i in range(epochs):

        # train
        model.train()
        batch_losses = []
        for batch_i, batch_data in enumerate(train_loader):
            logits, activations = model(batch_data['X'])
            loss = loss_criterion(logits, batch_data['y'])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            batch_losses.append(loss.item())
        history['train_losses'].append(sum(batch_losses) / len(batch_losses))

        # validate
        batch_val_losses, batch_val_accuracies = pred(model, valid_loader, loss_criterion)
        history['valid_losses'].append(sum(batch_val_losses) / len(batch_val_losses))
        history['valid_accuracy'].append(sum(batch_val_accuracies) / len(batch_val_accuracies))
        
    return history

# def write_param_history(model, history):
#     weights = model.scorer.weight.clone().detach().numpy()
#     bias = model.scorer.bias.data.clone().detach().numpy()
#     history['weights'].append(weights)
#     history['bias'].append(bias)    

In [6]:
@torch.no_grad()
def pred(model, test_loader, loss_criterion):
    '''Propogate forward on dev or test set, report loss and accuracy.'''
    
    # evaluate
    model.eval()
    batch_losses = []
    batch_accuracies = []
    for batch_i, batch_data in enumerate(test_loader):
        logits, activations = model(batch_data['X'])
        loss = loss_criterion(logits, batch_data['y'])
        batch_losses.append(loss.item())
        
        _, max_index = torch.max(logits, dim=1)
        accuracy = torch.mean(max_index.eq(batch_data['y']).type(torch.FloatTensor))
        batch_accuracies.append(accuracy.item())
    
    return batch_losses, batch_accuracies

## 3. Make Toy Dataset

In [11]:
# Pytorch Dataloader
# https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

# Pytorch Data Collate (Further reading, not implemented here)
# https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader

from torch.utils.data import Dataset, DataLoader

class ToyDataset(Dataset):
    """Toy dataset construction."""

    def __init__(self, data_dir):
        """
        Args:
            data_dir (string): Path to the directory with data files.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        # shape (m, nx)
        self.X = np.load(os.path.join(data_dir, 'features.npy'))
        # shape (m, ny=1)
        self.y = np.load(os.path.join(data_dir, 'labels.npy'))
        

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        else:
            X = torch.from_numpy(self.X[idx, :]).type(torch.FloatTensor)
            y = torch.tensor(self.y[idx], dtype=torch.long)
#             y = torch.from_numpy(self.y[idx, :]).type(torch.FloatTensor)
            sample = {'X': X, 'y': y}

        return sample

In [12]:
# give permission to access /datadrive
!sudo chmod -R 777 /datadrive 

In [33]:
import os
import numpy as np
from sklearn.datasets import make_classification

# construct and save toydataset

m_train, m_valid, m_test = 9000, 500, 500
m_total = m_train + m_valid + m_test

X, y = make_classification(n_samples=m_total, n_features=10, n_informative=10, n_redundant=0, n_repeated=0, n_classes=5, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=args.seed)
# y = np.expand_dims(y, -1)

np.random.seed(args.seed)
permutation = np.random.permutation(m_total)
print('First 10 training indices', permutation[:10])
print('X shape', X.shape)
print('y shape', y.shape)

train_indices = permutation[0:m_train]
valid_indices = permutation[m_train:m_train+m_valid]
test_indices = permutation[m_train+m_valid:]

dataset_dir = 'toy_mlp_1'
os.makedirs(os.path.join(args.data_dir, dataset_dir, 'train'), mode = 0o777, exist_ok = True) 
os.makedirs(os.path.join(args.data_dir, dataset_dir, 'valid'), mode = 0o777, exist_ok = True) 
os.makedirs(os.path.join(args.data_dir, dataset_dir, 'test'), mode = 0o777, exist_ok = True) 

np.save(os.path.join(args.data_dir, dataset_dir, 'train', 'features.npy'), X[train_indices])
np.save(os.path.join(args.data_dir, dataset_dir, 'train', 'labels.npy'), y[train_indices])

np.save(os.path.join(args.data_dir, dataset_dir, 'valid', 'features.npy'), X[valid_indices])
np.save(os.path.join(args.data_dir, dataset_dir, 'valid', 'labels.npy'), y[valid_indices])

np.save(os.path.join(args.data_dir, dataset_dir, 'test', 'features.npy'), X[test_indices])
np.save(os.path.join(args.data_dir, dataset_dir, 'test', 'labels.npy'), y[test_indices])

First 10 training indices [2656  445 9505  332 4168 2364 6097    7 7752 4453]
X shape (10000, 10)
y shape (10000,)


## 4. Check and set CUDA devices

In [14]:
print('Active CUDA Device: GPU', torch.cuda.current_device())

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())

Active CUDA Device: GPU 0
Available devices  2
Current cuda device  0


In [15]:
!nvidia-smi

Mon Oct 26 23:18:08 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 450.36.06    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           On   | 00006C84:00:00.0 Off |                    0 |
| N/A   45C    P8    26W / 149W |      3MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla K80           On   | 0000AA21:00:00.0 Off |                    0 |
| N/A   37C    P8    33W / 149W |      3MiB / 11441MiB |      0%      Default |
|       

In [16]:
# set cuda devices
cuda0 = torch.device('cuda:0') # same as cuda0 = torch.device('cuda:0') or cuda = torch.device('cuda')
cuda1 = torch.device('cuda:1') # same as cuda1 = torch.device('cuda:1')

## 5. Train and compare results on toy dataset

In [34]:
batch_size = 50
dataset_dir = 'toy_mlp_1'

training_set = ToyDataset(data_dir=os.path.join(args.data_dir, dataset_dir, 'train'))
training_generator = torch.utils.data.DataLoader(training_set, batch_size=batch_size, shuffle=True)

validation_set = ToyDataset(data_dir=os.path.join(args.data_dir, dataset_dir, 'valid'))
validation_generator = torch.utils.data.DataLoader(validation_set, batch_size=batch_size)

test_set = ToyDataset(data_dir=os.path.join(args.data_dir, dataset_dir, 'test'))
test_generator = torch.utils.data.DataLoader(test_set, batch_size=batch_size)

print('Train set X shape:', training_set.X.shape)
print('Train set y shape:', training_set.y.shape)

print('Valid set X shape:', validation_set.X.shape)
print('Valid set y shape:', validation_set.y.shape)

print('Test set X shape:', test_set.X.shape)
print('Test set y shape:', test_set.y.shape)

m = training_set.X.shape[0]
nx = training_set.X.shape[1]
ny = max(training_set.y) + 1

print('Num classes:', ny)

Train set X shape: (9000, 10)
Train set y shape: (9000,)
Valid set X shape: (500, 10)
Valid set y shape: (500,)
Test set X shape: (500, 10)
Test set y shape: (500,)
Num classes: 5


In [40]:
torch.manual_seed(args.seed)

# set off-the-shelf model, loss function and optimizer
model = MLPLazy(nx, [10, 10], ny)
# loss_criterion_lazy = nn.BCEWithLogitsLoss(reduction='mean')
loss_criterion_lazy = nn.CrossEntropyLoss(reduction='mean')
optimizer_lazy = torch.optim.SGD(model.parameters(), lr=args.lr)

history_off_the_shelf = train(model, training_generator, validation_generator, loss_criterion_lazy, optimizer_lazy, args, epochs=1000)

## classes=2 , layers=[10]

In [21]:
print(history_off_the_shelf['valid_losses'][::10])
print(history_off_the_shelf['train_losses'][::10])
print(history_off_the_shelf['valid_accuracy'][::10])

[0.7078015372157097,
 0.559940741956234,
 0.5136001236736775,
 0.49279844135046,
 0.4808144173026085,
 0.47350259937345984,
 0.46876141712069513,
 0.4649814983457327,
 0.4627411755174398,
 0.4607449018955231]

In [22]:
history_off_the_shelf['train_losses'][::10]

[0.7399412658479478,
 0.5025229536824756,
 0.42427631384796566,
 0.3825305799643199,
 0.35708342492580414,
 0.340332708424992,
 0.32642075336641735,
 0.3165907309287124,
 0.30814487404293484,
 0.30177313917213017]

In [23]:
history_off_the_shelf['valid_accuracy'][::10]

[0.49200001135468485,
 0.7340000128746033,
 0.7660000109672547,
 0.7640000106394291,
 0.7780000104010105,
 0.7820000104606152,
 0.7840000106394291,
 0.7860000105202198,
 0.7920000103116035,
 0.7920000103116035]

## classes=5 , layers=[10], 100 epochs

In [36]:
print(history_off_the_shelf['valid_losses'][::10])
print()
print(history_off_the_shelf['train_losses'][::10])
print()
print(history_off_the_shelf['valid_accuracy'][::10])

[1.594672179222107, 1.4679929852485656, 1.444815957546234, 1.4348259568214417, 1.429522454738617, 1.4265439987182618, 1.42513347864151, 1.42394357919693, 1.4238737106323243, 1.4231544494628907]

[1.6281403058105044, 1.4527535690201654, 1.4242660714520348, 1.4123969687355888, 1.4064121895366244, 1.403214869234297, 1.4013509809970857, 1.4003351860576205, 1.399590598874622, 1.3992673403686948]

[0.25399999916553495, 0.3640000030398369, 0.364000004529953, 0.37000000178813935, 0.3699999988079071, 0.3739999979734421, 0.3680000066757202, 0.3700000047683716, 0.3659999996423721, 0.36200000047683717]


## classes=5 , layers=[10], 1000 epochs

In [39]:
print(history_off_the_shelf['valid_losses'][::100])
print()
print(history_off_the_shelf['train_losses'][::100])
print()
print(history_off_the_shelf['valid_accuracy'][::100])

[1.594672179222107, 1.4226765275001525, 1.421881663799286, 1.4215632200241088, 1.4215431332588195, 1.421615970134735, 1.4213977694511413, 1.4214471697807312, 1.4216774225234985, 1.4215827703475952]

[1.6281403058105044, 1.3990414460500082, 1.3984545992480384, 1.3984812352392408, 1.398521096838845, 1.398539662361145, 1.3984148303667705, 1.3984918137391409, 1.398470558722814, 1.3985287633207109]

[0.25399999916553495, 0.3640000015497208, 0.3659999996423721, 0.3679999977350235, 0.3659999966621399, 0.3679999977350235, 0.36999999582767484, 0.3679999977350235, 0.3679999977350235, 0.3679999977350235]


## classes=5 , layers=[10, 10], 1000 epochs

In [42]:
print(history_off_the_shelf['valid_losses'][::100])
print()
print(history_off_the_shelf['train_losses'][::100])
print()
print(history_off_the_shelf['valid_accuracy'][::100])

[1.6235280156135559, 1.4976302146911622, 1.4778198719024658, 1.4685138702392577, 1.4632822513580321, 1.4599974513053895, 1.4577441096305848, 1.4561721086502075, 1.455070471763611, 1.4541317343711853]

[1.6377845022413466, 1.4939362631903754, 1.4735652685165406, 1.4636919293138715, 1.4579832388295069, 1.4542949391735924, 1.4518313255574968, 1.450035469399558, 1.4487053102917142, 1.4477064046594832]

[0.15600000098347663, 0.3600000023841858, 0.36200000047683717, 0.36600000262260435, 0.3659999996423721, 0.3740000009536743, 0.37400000393390653, 0.3680000066757202, 0.37200000286102297, 0.37200000286102297]
