In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import seaborn as sns
import plotly.express as px
import json
from sklearn.model_selection import train_test_split
import matplotlib as plt
import os

In [None]:
# Get cpu or gpu device for training
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(300, 5),
            nn.ReLU(),
            nn.Linear(5, 1)
        )
    
    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits  


In [None]:
model = MLP().to(device)

# Read Data

In [None]:
json_labels = []
raw_dfs = []

nfiles = 3
i=0

for subdir in os.listdir('./data/'):
    is_annot = False
    for file in os.listdir(f'./data/{subdir}'):
        if file.endswith('.json'):            
            # json files with annotations - dataset is annotated
            i += 1
            is_annot = True

            # Read labels
            with open(f'data/{subdir}/{file}', 'r') as f:
                json_labels.append(json.load(f))
        
        if is_annot and file.startswith('raw_data'):
            # if dataset is annotated, read raw data file
            df = pd.read_csv(f'data/{subdir}/{file}', header=None)
            raw_dfs.append(df[[2,3,4]])

    if(is_annot):
        print(subdir)
    else:
        print(f'Skipped {subdir}')
        
    if i == nfiles:
        break

In [None]:
# Window data

windowed = []

for i,df in enumerate(raw_dfs):
    print(f'Raw {i}: {df.shape}')

    w = np.empty((len(df)-99, 300), dtype=float)
    for j in range(len(df)-99):
        w[j] = df[j:j+100].to_numpy().T.flatten()

    print(f'Windowed {i}: {w.shape}')
    windowed.append(w)

X = np.concatenate(windowed)
print(f'X: {X.shape}')

In [None]:
# Labels
all_labels = []

for i, annot in enumerate(json_labels):
    labels = np.zeros(len(windowed[i]))     # each json file corrosponds to windowed data at same index
    for j in range(annot['start'], annot['end']):
        for puff in annot['puffs']:
            if j >= puff['start'] and j <= puff['end']:
                labels[j] = 1
    all_labels.append(labels)

y = np.concatenate(all_labels)
print(f'y: {y.shape}')

In [None]:
# visualize true labels on continous signal
df = pd.concat(raw_dfs).reset_index(drop=True)
df['labels'] = np.concatenate([np.pad(labels*10, (0,99), mode='constant', constant_values=0) for labels in all_labels])

fig = px.line(data_frame=df.loc[::10])
fig.show(renderer='browser')
df

In [None]:
# Train model

model = MLP().to(device)

(X_train, X_test, y_train, y_test) = train_test_split(X_pt[annot['start']:annot['end']], y_pt[annot['start']:annot['end']], test_size=0.25, stratify=y_pt[annot['start']:annot['end']].to('cpu'))

# Loss function and optimizer
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 100
batch_size = 128

(losses, test_losses) = train(model, epochs, batch_size, loss_fn, optimizer, X_train, X_test, y_train, y_test)

figure = px.line(pd.DataFrame({"loss": losses, "validation loss":test_losses}))
figure.show(renderer='browser')