# TODO:

Try adding skip connections, resnet style

1. [x] load dataset into tensor, convert to float32  
    - [x] apply normalization
2. [x] Implement DataParallel training  
    - [x] increase minibatch size to 128 for 32 per device
3. [ ] try training and benchmark speed
4. [ ] fix simulation script to get the correct labels and retrain

gpu datasheet (we have sxm version): https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf

TODO: change dataparallel to distributed data parallel at some point, and move everything from the notebook into a training script

Links:
https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices'


In [1]:
import numpy as np
import scipy.io
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, TensorDataset, DataLoader
from pathlib import Path
import h5py
import numpy as np
import dask.array as da
from torchvision.transforms import Normalize
from sklearn.model_selection import train_test_split
import sklearn
import pandas as pd
from functools import partial

In [2]:
date_ = '04-19'
dataset_name = 'spectrogram'
model_type = 'MyCNN-double-conv'
# model_type = 'Simple-NN'
loss_name = 'mse-euc-hybrid-loss'
model_name = '_'.join([date_, dataset_name, model_type, loss_name])
model_name

'04-19_spectrogram_MyCNN-double-conv_mse-euc-hybrid-loss'

In [3]:
for i in range(torch.cuda.device_count()):
    print(torch.cuda.get_device_name(i))
    
# use devices 0-3

NVIDIA A100-SXM4-80GB
NVIDIA A100-SXM4-80GB
NVIDIA A100-SXM4-80GB
NVIDIA A100-SXM4-80GB
NVIDIA DGX Display


### Load dataset

In [4]:
# spectrogram:

feats = h5py.File('samplesChirp.mat', 'r')
labels = h5py.File('labelsChirp.mat', 'r')
feats_da = da.from_array(feats['samples']).astype('float32') # cast to float32
feats_da = feats_da[:,None,:,:] # add channel dimension
labels_da = da.from_array(labels['labels']['position']).astype('float32')
X = torch.Tensor(feats_da.compute())
Y = torch.Tensor(labels_da.compute())

X.shape, Y.shape, X.dtype, Y.dtype




# feats4T:

# feats, labels = scipy.io.loadmat('output/feats4T_.1R.mat'), scipy.io.loadmat('output/labels4T_.1R.mat')

# feats = feats['features']
# feats = feats.astype('float32')
# feats = feats.T
# feats = feats.reshape((feats.shape[0], -1, feats.shape[-1]))
# feats = feats[:, None, :, :]

# labels = labels['lp']
# labels = labels.astype('float32')
# labels = labels.T

# X = torch.Tensor(feats)
# Y = torch.Tensor(labels)

# X.shape, Y.shape, X.dtype, Y.dtype

(torch.Size([1920, 1, 300, 1024]),
 torch.Size([1920, 3]),
 torch.float32,
 torch.float32)

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [6]:
X_train_mean = X_train.mean()
X_train_std = X_train.std()
X_train_mean, X_train_std

(tensor(3.6102), tensor(26.3429))

### Normalization
1. create custom Dataset class based on TensorDataset that will apply a normalization transform if provided
2. create train and test datasets, pass in X_train_mean and X_train_std

In [7]:
class CustomTensorDataset(Dataset):
    def __init__(self, tensors, transforms=None):
        # check to make sure number of samples match
        assert all(tensors[0].shape[0] == tens.shape[0] for tens in tensors)
        self.tensors = tensors
        self.transforms = transforms
        
    def __getitem__(self, index):
        x = self.tensors[0][index]
        
        if self.transforms is not None:
            x = self.transforms(x)
            
        y = self.tensors[1][index]
        return x, y
    
    def __len__(self):
        return self.tensors[0].shape[0]

In [8]:
train_dataset = CustomTensorDataset([X_train, Y_train], Normalize(X_train_mean, X_train_std))
test_dataset = CustomTensorDataset([X_test, Y_test], Normalize(X_train_mean, X_train_std))

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=True)

### Define models and functions

In [10]:
class MyCNN(nn.Module):
    def __init__(self):
        super(MyCNN, self).__init__()
        
        linear_in_dim = 303104 # SPECTROGRAM DSET
        # linear_in_dim = 768 # FEATS4T DSET
        # 1 x 16 x 48 shape of each sample
        
        self.seq = nn.Sequential(
            # conv block 1
            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Dropout2d(p=0.2),
            
            # conv block 2
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Dropout2d(p=0.2),

            # conv block 3
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # nn.Dropout2d(p=0.2),
            
            # linear block
            nn.Flatten(),
            nn.Linear(linear_in_dim, 1000),
            nn.ReLU(),
            nn.Linear(1000, 100),
            nn.ReLU(),
            nn.Linear(100, 20),
            nn.ReLU(),
            nn.Linear(20, 3)
        )
    
    def forward(self, x):
        return self.seq(x)
        
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.seq = nn.Sequential(
            nn.Flatten(),
            nn.Linear(768, 20),
            nn.ReLU(),
            nn.Linear(20, 10),
            nn.ReLU(),
            nn.Linear(10, 3)
        )
    def forward(self, x):
        return self.seq(x)

def EucLoss(a: torch.Tensor, b: torch.Tensor, reduction='avg') -> torch.Tensor:
    assert a.shape == b.shape
    assert b.shape[-1] == 3
    
    if reduction == 'sum':
        return torch.sum((a-b).square(), dim=-1).sqrt().sum()
    else:
        return torch.sum((a-b).square(), dim=-1).sqrt().mean()

def EucLossSquared(a: torch.Tensor, b: torch.Tensor, reduction='avg') -> torch.Tensor:
    assert a.shape == b.shape
    assert b.shape[-1] == 3
    
    if reduction == 'sum':
        return (a-b).square().sum()
    else:
        return torch.sum((a-b).square(), dim=-1).mean()


In [11]:
model = nn.DataParallel(MyCNN(), device_ids=[0,1,2,3]).cuda()
model

DataParallel(
  (module): MyCNN(
    (seq): Sequential(
      (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU()
      (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (7): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (9): ReLU()
      (10): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (12): ReLU()
      (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (14): Conv2d(32, 64

In [12]:
trainloss = nn.MSELoss()
testloss = partial(EucLoss, reduction='sum')

optimizer = torch.optim.Adam(model.parameters())
# optimizer = torch.optim.RMSprop(model.parameters())

#### Training/Evaluating NN

In [None]:
num_epochs = 100 
loss_tracker = np.zeros((num_epochs, 2))

for epoch in range(num_epochs):
    train_loss = 0    
    model = model.train()
    
    for batch_idx, (ft, lbl) in enumerate(train_loader):
        # ft, lbl = ft.to(device), lbl.to(device)
        optimizer.zero_grad()
        output = model(ft)
        lbl = lbl.cuda()
        # loss = crit(output, lbl, reduction='sum')
        loss = trainloss(output, lbl)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    loss_tracker[epoch, 0] = train_loss
        
    
    test_loss = 0
    model = model.eval()
    
    with torch.no_grad():
        for batch_idx, (ft, lbl) in enumerate(test_loader):
            # ft, lbl = ft.to(device), lbl.to(device)
            output = model(ft)
            lbl = lbl.cuda()
            # loss = crit(output, lbl, reduction='sum')
            loss = testloss(output, lbl)
            test_loss += loss.item()
    test_loss /= len(test_dataset) # get average loss per sample of whole dataset
    loss_tracker[epoch, 1] = test_loss
            
    print('Epoch {} | Training loss (MSE) = {:.4f} | Test loss (Euc Dist) = {:.4f}'.format(epoch, train_loss, test_loss))
    
    
# make plot
img_dir = Path('./loss_plots')
img_dir.mkdir(parents=True, exist_ok=True)
plt.figure()
plt.plot(loss_tracker)
plt.title('Loss vs Epoch')
plt.legend(['Train', 'Test'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid()
plt.savefig(img_dir / ('{}.png'.format(model_name)))

# export loss_tracker as csv
loss_dir = Path('./loss_data')
loss_dir.mkdir(parents=True, exist_ok=True)
pd.DataFrame(loss_tracker, columns=['Train', 'Test']).to_csv(loss_dir / ('{}.csv'.format(model_name)))

# save model
model_dir = Path('./models')
model_dir.mkdir(parents=True, exist_ok=True)
torch.save(model.state_dict(), model_dir / ('{}.pt'.format(model_name)))

Epoch 0 | Training loss (MSE) = 57.2544 | Test loss (Euc Dist) = 3.6169


### Notes:

CNN seemed to help accuracy, as well as more linear layers. However, it is overfitting heavily. Batchnorm didn't really make a difference, dropout seems to make things worse.