In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import seaborn as sns

In [None]:
# Get raw, windowed data
import os
import urllib.request

if not os.path.exists('./data/raw/smoking_input.csv') or not os.path.exists('./data/raw/smoking_targets.csv'):
    os.system('mkdir -p data/raw')
    urllib.request.urlretrieve("http://ifestos.cse.sc.edu/datasets/smoking_data.tar.gz", "data/smoking_data.tar.gz")

    os.system('tar -xzvf data/smoking_data.tar.gz -C data/raw/ --strip-components=1')

In [12]:
# Load data
X = pd.read_csv('data/raw/smoking_input.csv', header=None)
y = pd.read_csv('data/raw/smoking_targets.csv', header=None)

In [18]:
# Formatting & Preprocessing

# Get cpu or gpu device for training
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

y.columns = ["label"]

# Fill NaNs
X = X.fillna(method='bfill')

# Put in torch tensors
X_pt = torch.from_numpy(X.to_numpy()).float().to(device)
y_pt = torch.from_numpy(y.to_numpy()).float().to(device)

# train-test split

from sklearn.model_selection import train_test_split

(X_train, X_test, y_train, y_test) = train_test_split(X_pt, y_pt, test_size=0.2)

Using cuda device


# Neural network

In [127]:
# Hidden layer

# X_test:   70306 x 300
# W1:         300 x  10
# b1:                10 -> broadcasted to 70306 x 10
W1 = torch.rand(300, 10, requires_grad=True).to(device)
b1 = torch.rand(10, requires_grad=True).to(device)
h1 = nn.ReLU()(X_test @ W1 + b1)

# Output Layer
# h1:   70306 x 10
# W2:      10 x  1
# b2:            1 -> broadcasted to 70306 x 1
W2 = torch.rand(10, 1, requires_grad=True).to(device)
b2 = torch.rand(1, requires_grad=True).to(device)
logits = h1 @ W2 + b2
probs = 1 / (1 + torch.exp(-logits)) # sigmoid

loss = y_test * torch.log(probs) + (1-y_test) * torch.log(1-probs)
loss

tensor([[-0.1700],
        [-0.6913],
        [    nan],
        ...,
        [-0.6913],
        [-0.6950],
        [-0.6950]], device='cuda:0', grad_fn=<AddBackward0>)

In [129]:
logits

tensor([2208.2280], device='cuda:0', grad_fn=<UnbindBackward0>)

## Pytorch Class Model
- input layer 300 -> 10
- activation function ReLU
- hidden layer 10 -> 1
- output function Tanh

In [14]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(300, 10),
            nn.ReLU(),
            nn.Linear(10, 1)
        )
    
    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits    

### Train Model

In [None]:
model = MLP().to(device)

# Loss function and optimizer
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

model.train()

epochs = 1000
for i in range(epochs):
    # Predict and calulate loss on X train
    pred = model(X_train)
    loss = loss_fn(pred, y_train)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if i%50 == 0:
        print(f"Epoch {i}: ", end='')
        print(f'\tLoss={loss.item()}')

### Test Model

In [None]:
model.eval()
with torch.no_grad():
    pred = model(X_test)

loss = loss_fn(pred, y_test).item()

# couldn't help but make this a one-liner but not sure about this, how to choose whether pred is 0 or 1 label? (right now: 1 if pred >=0 else 0)
accuracy = (y_test.to('cpu').numpy() == np.array([ (lambda p: 1 if p>=0 else 0)(p) for p in pred.detach().to('cpu').numpy()]).reshape(-1,1)).sum()/len(y_test)

print(f'Test Loss: {loss:.3}')
print(f'Accuracy: {100*accuracy:.3}%')

# Test on Continous Signal

In [10]:
df = pd.read_csv('data/16/raw_data.csv', header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,86932173146667,0,0.823242,0.227417,10.031250,0,0,19-Apr-22 11:25:49,ACCEL,NONE,1
1,86932183046667,0,0.809082,0.318359,10.125000,0,0,19-Apr-22 11:25:49,ACCEL,NONE,2
2,86932192946667,0,0.749023,0.406982,10.031250,0,0,19-Apr-22 11:25:49,ACCEL,NONE,3
3,86932202846667,0,0.780273,0.437988,10.054688,0,0,19-Apr-22 11:25:49,ACCEL,NONE,4
4,86932212746667,0,0.878418,0.442871,10.179688,0,0,19-Apr-22 11:25:49,ACCEL,NONE,5
...,...,...,...,...,...,...,...,...,...,...,...
142495,88370976246667,0,-3.537109,1.498047,10.898438,5,3,19-Apr-22 11:49:49,ACCEL,Medication,142496
142496,88370986346667,0,-1.946289,-2.875000,8.726562,5,3,19-Apr-22 11:49:49,ACCEL,Medication,142497
142497,88370996446667,0,-2.613281,-1.670898,9.234375,5,3,19-Apr-22 11:49:49,ACCEL,Medication,142498
142498,88371006546667,0,-1.484375,-0.368652,9.132812,5,3,19-Apr-22 11:49:49,ACCEL,Medication,142499
