In [166]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import seaborn as sns

In [None]:
# Get raw, windowed data
import os
import urllib.request

if not os.path.exists('./data/raw/smoking_input.csv') or not os.path.exists('./data/raw/smoking_targets.csv'):
    os.system('mkdir -p data/raw')
    urllib.request.urlretrieve("http://ifestos.cse.sc.edu/datasets/smoking_data.tar.gz", "data/smoking_data.tar.gz")

    os.system('tar -xzvf data/smoking_data.tar.gz -C data/raw/ --strip-components=1')

In [12]:
# Load data
X = pd.read_csv('data/raw/smoking_input.csv', header=None)
y = pd.read_csv('data/raw/smoking_targets.csv', header=None)

In [167]:
# Formatting & Preprocessing

# Get cpu or gpu device for training
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

y.columns = ["label"]

# Fill NaNs
X = X.fillna(method='bfill')

# Put in torch tensors
X_pt = torch.from_numpy(X.to_numpy()).float().to(device)
y_pt = torch.from_numpy(y.to_numpy()).float().to(device)

# train-test split

from sklearn.model_selection import train_test_split

(X_train, X_test, y_train, y_test) = train_test_split(X_pt, y_pt, test_size=0.2)

Using cuda device


# Neural network

## Pytorch Model
- input layer 300 -> 10
- activation function ReLU
- hidden layer 10 -> 1
- output function Tanh

In [237]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(300, 10),
            nn.ReLU(),
            nn.Linear(10, 1)
        )
    
    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits  

model = MLP().to(device)

### Train Model

In [238]:
# Loss function and optimizer
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()

epochs = 2500
for i in range(epochs):
    # Predict and calulate loss on X train
    logits = model(X_train)
    loss = loss_fn(logits, y_train)

    # Backpropagation


    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if i%100 == 0:
        print(f"Epoch {i}: ", end='')
        print(f'\tLoss={loss.item()}')

Epoch 0: 	Loss=0.7529674768447876
Epoch 100: 	Loss=0.03200244903564453
Epoch 200: 	Loss=0.016902804374694824
Epoch 300: 	Loss=0.010966378264129162
Epoch 400: 	Loss=0.006567590404301882
Epoch 500: 	Loss=0.00480006355792284
Epoch 600: 	Loss=0.0037112447898834944
Epoch 700: 	Loss=0.0028870324604213238
Epoch 800: 	Loss=0.0023192083463072777
Epoch 900: 	Loss=0.0019045922672376037
Epoch 1000: 	Loss=0.0015605189837515354
Epoch 1100: 	Loss=0.001278255949728191
Epoch 1200: 	Loss=0.0010367868235334754
Epoch 1300: 	Loss=0.0008328840485773981
Epoch 1400: 	Loss=0.000656703719869256
Epoch 1500: 	Loss=0.0005191547679714859
Epoch 1600: 	Loss=0.0004152397159487009
Epoch 1700: 	Loss=0.0003379601112101227
Epoch 1800: 	Loss=0.00027638388564810157
Epoch 1900: 	Loss=0.00022878227173350751
Epoch 2000: 	Loss=0.0001926984405145049
Epoch 2100: 	Loss=0.000163731470820494
Epoch 2200: 	Loss=0.00014037500659469515
Epoch 2300: 	Loss=0.00012152841372881085
Epoch 2400: 	Loss=0.00010690922499634326


### Test Model

In [239]:
model.eval()
with torch.no_grad():
    logits = model(X_test)

loss = loss_fn(logits, y_test)

pred = torch.round(nn.Sigmoid()(logits))
accuracy = (sum(y_test == pred) / len(y_test)).item()

print(f'Test Loss: {loss:.4}')
print(f'Accuracy: {100*accuracy:.4}%')

Test Loss: 0.004102
Accuracy: 99.93%


# Test on Continous Signal

In [305]:
file_index = 16

In [303]:
# Features

df = pd.read_csv(f'data/{file_index}/raw_data.csv', header=None)

# Window Data
X_raw = df[[2,3,4]]

X = np.empty((len(X_raw)-99, 300), dtype=float)
for i in range(len(X_raw)-99):
    X[i] = X_raw[i:i+100].to_numpy().T.flatten()

In [333]:
# Labels
import json

annot = {}
y = np.zeros((len(X)))


with open(f'data/{file_index}/16_data.json') as f:
    annot = json.load(f)

for i in range(annot['start'], annot['end']):
    for puff in annot['puffs']:
        if i >= puff['start'] and i <= puff['end'] - 99:
            y[i] = 1

In [375]:
import seaborn as sns
import plotly.express as px



fig = px.line(data_frame=df[[2,3,4]])
# fig.show(renderer='browser')
y_graph = np.pad(y, (0,99), mode='edge')
pd.concat([X_raw, pd.DataFrame(y_graph)])

Unnamed: 0,2,3,4,0
0,0.823242,0.227417,10.031250,
1,0.809082,0.318359,10.125000,
2,0.749023,0.406982,10.031250,
3,0.780273,0.437988,10.054688,
4,0.878418,0.442871,10.179688,
...,...,...,...,...
142495,,,,0.0
142496,,,,0.0
142497,,,,0.0
142498,,,,0.0
