Implementing baseline neural networks defined in this paper `https://arxiv.org/pdf/1611.06455.pdf`

In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from smokingml.modules import test, train, validate_on_holdouts

In [2]:
WIN_SIZE = 101

class SmokingDataset(Dataset):
    def __init__(self, dir):
        self.dir = dir
    
    def __len__(self):
        return len(os.listdir(self.dir))
    
    def __getitem__(self, key):
        if isinstance(key, slice):
            stop, start, step = key.indices(len(self))

            length = len(range(stop, start, step))
            X = torch.zeros([length, 3*WIN_SIZE])
            y = torch.zeros([length, 1])

            for j,i in enumerate(range(stop, start, step)):
                xi, yi = self[i]
                X[j] = xi
                y[j] = yi

            return (X, y)

        elif isinstance(key, int):
            X, y = torch.load(os.path.join(self.dir, f'{key}.pt'))
            return (X.flatten(), y)

In [3]:
dir = '../data/working-dataset-20Hz/'
train_dataset = SmokingDataset(f'{dir}/4_all/train/')
test_dataset = SmokingDataset(f'{dir}/4_all/test/')

In [4]:
# Get cpu or gpu device for training
device = "cuda:1" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

Using cuda:1 device


# MLP

Three hidden layer MLP, with 500 neurons each

Dropout after each layer with probabilites:
- 0.1 after input layer
- 0.2 between hidden layers
- 0.3 before output neuron


one output neuron to predict smoking

In [5]:
# Define Model
nhl = 500

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        
        # First Hidden Layer
        self.dropout1 = nn.Dropout(p=0.1)
        self.h1 = nn.Linear(in_features=WIN_SIZE*3, out_features=nhl)
        self.relu1 = nn.ReLU()

        # Second Hidden Layer
        self.dropout2 = nn.Dropout(p=0.2)
        self.h2 = nn.Linear(in_features=nhl, out_features=nhl)
        self.relu2 = nn.ReLU()

        # Third Hidden Layer
        self.dropout3 = nn.Dropout(p=0.2)
        self.h3 = nn.Linear(in_features=nhl, out_features=nhl)
        self.relu3 = nn.ReLU()

        # Output Layer
        self.dropout4 = nn.Dropout(p=0.3)
        self.h4 = nn.Linear(in_features=nhl, out_features=1)

    def forward(self, x):
        # First Hidden Layer
        x = self.dropout1(x)
        x = self.h1(x)
        x = self.relu1(x)

        # Second Hidden Layer
        x = self.dropout2(x)
        x = self.h2(x)
        x = self.relu2(x)

        # Third Hidden Layer
        x = self.dropout3(x)
        x = self.h3(x)
        x = self.relu3(x)

        # Output Layer
        x = self.dropout4(x)
        logits = self.h4(x)

        return logits

model = MLP().to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adadelta(model.parameters(), lr=0.1, rho=0.95, eps=1e-8)

### Train and test

In [None]:
epochs = 40
batch_size = 64

train(
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    model=model,
    epochs=epochs,
    batch_size=batch_size,
    test_batch_size=10000,
    optimizer=optimizer,
    criterion=criterion,
    date='',
    device=device,
    project='.'
)

In [8]:
model.load_state_dict(torch.load('./model/model-epoch-22.pt'))

<All keys matched successfully>

In [6]:
# Evaluate on holdouts
validate_on_holdouts(
    model=model,
    holdout_dir=f'{dir}/holdouts',
    df_dir=f'{dir}/1_xyz',
    raw_dir=f'{dir}/0_raw',
    date='',
    criterion=criterion,
    batch_size=10000,
    win_size=WIN_SIZE,
    device=device,
    project='.',
    dm_factor=5
)

[92mStarting validate_on_holdouts[0m


mkdir: cannot create directory ‘./results/holdouts/60’: File exists
mkdir: cannot create directory ‘./results/holdouts/57’: File exists
mkdir: cannot create directory ‘./results/holdouts/53’: File exists


KeyboardInterrupt: 

### Evaluate on Holdouts

# FCN
Fully Convolutional Network
All convolution with 1D kernels, no stride

Three convolution blocks, all following by batch norm and relu:
1. 128 kernels, kernel size 8
2. 256 kernels, kernel size 5
3. 128 kernels, kernel size 3

Final global pooling layer before output neuron

In [7]:
# Reshape X to contain 3 channels (shape 3x101 instead of 303)

WIN_SIZE = 101

class SmokingDatasetCNN(Dataset):
    def __init__(self, dir):
        self.dir = dir
    
    def __len__(self):
        return len(os.listdir(self.dir))
    
    def __getitem__(self, key):
        if isinstance(key, slice):
            stop, start, step = key.indices(len(self))

            length = len(range(stop, start, step))
            X = torch.zeros([length, 3, WIN_SIZE])
            y = torch.zeros([length, 1])

            for j,i in enumerate(range(stop, start, step)):
                xi, yi = self[i]
                X[j] = xi.reshape(3, WIN_SIZE)
                y[j] = yi

            return (X, y)

        elif isinstance(key, int):
            X, y = torch.load(os.path.join(self.dir, f'{key}.pt'))
            X = X.reshape([3, WIN_SIZE])
            return (X, y)

In [8]:
train_dataset = SmokingDatasetCNN(f'{dir}/4_all/train')
test_dataset = SmokingDatasetCNN(f'{dir}/4_all/test')

In [9]:
class FCN(nn.Module):
    def __init__(self):
        super().__init__()
        
        # First Convolution Block
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=128, kernel_size=8)
        self.bn1 = nn.BatchNorm1d(num_features=128)
        self.relu1 = nn.ReLU()

        # Second Convolution Block
        self.conv2 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=5)
        self.bn2 = nn.BatchNorm1d(num_features=256)
        self.relu2 = nn.ReLU()

        # Third Convolution Block
        self.conv3 = nn.Conv1d(in_channels=256, out_channels=128, kernel_size=3)
        self.bn3 = nn.BatchNorm1d(num_features=128)
        self.relu3 = nn.ReLU()

        # Global Average Pooling
        self.gp = lambda x: torch.mean(x, dim=2)    # Take mean across each feature map (N, C, L) => (N,C)
        
        # Output Later
        self.output = nn.Linear(in_features=128, out_features=1)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)

        x = self.gp(x)
        logits = self.output(x)

        return logits

model = FCN().to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-8)

In [17]:
epochs = 40
batch_size = 64

train(
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    model=model,
    epochs=epochs,
    batch_size=batch_size,
    test_batch_size=10000,
    optimizer=optimizer,
    criterion=criterion,
    date='',
    device=device,
    project='.'
)

[92mStarting train[0m


Epoch 39: Testing: 100%|██████████| 40/40 [12:21<00:00, 18.55s/it] 

[93mFinished train. Elapsed time: 741.856[0m





In [10]:
model.load_state_dict(torch.load('./model/model-epoch-29.pt'))

<All keys matched successfully>

In [11]:
validate_on_holdouts(
    model=model,
    holdout_dir=f'{dir}/holdouts',
    df_dir=f'{dir}/1_xyz',
    raw_dir=f'{dir}/0_raw',
    date='',
    criterion=criterion,
    batch_size=10000,
    win_size=WIN_SIZE,
    device=device,
    project='.',
    dm_factor=5,
    cnn=True
)

[92mStarting validate_on_holdouts[0m


mkdir: cannot create directory ‘./results/holdouts/60’: File exists
mkdir: cannot create directory ‘./results/holdouts/57’: File exists
mkdir: cannot create directory ‘./results/holdouts/53’: File exists
mkdir: cannot create directory ‘./results/holdouts/8’: File exists
mkdir: cannot create directory ‘./results/holdouts/31’: File exists
mkdir: cannot create directory ‘./results/holdouts/27’: File exists


┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐
│          │        8 │       27 │       31 │       53 │       57 │       60 │
├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤
│ Accuracy │ 0.927887 │ 0.937633 │ 0.952751 │ 0.901715 │ 0.973956 │ 0.975031 │
├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤
│ Loss     │ 1.79284  │ 0.502777 │ 0.847194 │ 0.483618 │ 1.14741  │ 0.298814 │
└──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘
[93mFinished validate_on_holdouts. Elapsed time: 6.392[0m


# ResNet

Adds shortcut connection in ecah residual block
Three residual blocks. Each consist of three convolutional blocks 
- conv blocks are same as before of 64, 128, and 128 kernels each
    - conv kernel size goes 8->5->3 for each residual block
- final output of residual block is output of third convolutional block + input to residual block

output of third residual block passed to a global pooling layer before a linear layer to a single output neuron

In [21]:
class ResNet(nn.Module):
    def conv_block(self, in_channels, out_channels, kernel_size):
        return nn.Sequential(
            nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size),
            nn.BatchNorm1d(num_features=out_channels),
            nn.ReLU()
        )
    
    def res_block(self, in_channels, out_channels):
        return nn.Sequential(
            self.conv_block(in_channels=in_channels, out_channels=out_channels, kernel_size=8),
            self.conv_block(in_channels=out_channels, out_channels=out_channels, kernel_size=5),
            self.conv_block(in_channels=out_channels, out_channels=out_channels, kernel_size=3)
        )

    def __init__(self):
        super().__init__()
        self.res1 = self.res_block(in_channels=3, out_channels=64)
        self.res2 = self.res_block(in_channels=64, out_channels=128)
        self.res3 = self.res_block(in_channels=128, out_channels=128)
        self.gp = lambda x: torch.mean(x, dim=2)    # Take mean across each feature map (N, C, L) => (N,C)
        self.output = nn.Linear(in_features=128, out_features=1)

    def forward(x):
        pass

model = ResNet().to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-8)

In [22]:
x,y = test_dataset[0]
x,y = x.to(device), y.to(device)
model.res1(x)

RuntimeError: running_mean should contain 94 elements not 64