In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, TensorDataset
from tqdm import tqdm
from pathlib import Path
from sklearn.model_selection import train_test_split
import numpy as np
from time import time
import matplotlib.pyplot as plt
from smokingml.utils import plot_and_save_cm
from smokingml.models import MLP_1hl
from smokingml.modules import optimization_loop, evaluate_loop
from smokingml.datasets.nursing_dataset_v1 import (
    NursingDatasetV1,
    WINSIZE,
    nursingv1_train_dev_test_split,
    load_one_session,
    load_sessions,
    load_one_windowed_session,
    load_windowed_sessions,
    utils
)
from sklearn.metrics import precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# nursingv1_dir = Path('../data/nursingv1_dataset')
# np.random.seed(0)

# # # Using train dev test split function on all sessions
# # train_dataset, dev_dataset, test_dataset = nursingv1_train_dev_test_split(nursingv1_dir, 0.5, 0.2, 0.3)

# session_ids = utils.get_all_session_ids(nursingv1_dir)

# ## Using all sessions in fs - takes 13.5 minutes
# start_time = time()
# dataset = NursingDatasetV1(nursingv1_dir, session_ids)
# for X,y in DataLoader(dataset):
#     pass
# print(f'Elapsed Time fs: {time() - start_time}')

# ## Load all sessions into memory - takes 1 minute
# start_time = time()
# dataset = load_windowed_sessions(nursingv1_dir, session_ids)
# for X,y in DataLoader(dataset):
#     pass
# print(f'Elapsed Time fs: {time() - start_time}')

Create a dataset as follows. 
1. Load and individually-window data from 10 participants
2. aggregate (concatenate)
3. shuffle windows (across all 10)
4. train-dev split (80-20, no testing here)
5. optimize a simple MLP for a number of epochs
6. plot train and dev loss
7. print f1 score for trainloader and devloader
8. plot confusion matrix for both train and dev sets.

In [2]:
# Load 10 sessions
dev_size = 0.2
batch_size = 64

nursingv1_dir = Path('../data/nursingv1_dataset')
session_ids = utils.get_all_session_ids(nursingv1_dir)
dataset = load_windowed_sessions(nursingv1_dir, session_ids=session_ids)

train_dataset, dev_dataset = utils.train_test_split_windows(dataset, test_size=dev_size)
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
devloader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=True)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
model = MLP_1hl(n_hl=10, n_features=WINSIZE*3).to(device)
optimizer = MLP_1hl.get_optimizer(model)
criterion = MLP_1hl.get_criterion()
epochs = 1

optimization_loop(model, trainloader, devloader, criterion, optimizer, epochs, device)

y_train_pred = evaluate_loop(model, trainloader, device)
y_train_true = train_dataset.tensors[1]
prec_tr, recall_tr, f1score_tr, _ = precision_recall_fscore_support(
    y_train_true, y_train_pred
)
print(f'Train: precision: {prec_tr}, recall: {recall_tr}, f1 score: {f1score_tr}')
plot_and_save_cm(y_train_pred, y_train_true, "train_cm.jpg")

y_dev_pred = evaluate_loop(model, devloader, device)
y_dev_true = dev_dataset.tensors[1]
prec_dev, recall_dev, f1score_dev, _ = precision_recall_fscore_support(
    y_dev_true, y_dev_pred
)
print(f'Dev: precision: {prec_dev}, recall: {recall_dev}, f1 score: {f1score_dev}')
plot_and_save_cm(y_dev_pred, y_dev_true, "dev_cm.jpg")

Epoch 0: Train Loss: 0.091939: Dev Loss: 0.087565: 100%|██████████| 1/1 [00:44<00:00, 44.74s/it]


Train: precision: [0.95947572 0.04012306], recall: [0.98413262 0.01570628], f1 score: [0.97164777 0.02257537]
Dev: precision: [0.96014772 0.04406662], recall: [0.98444184 0.01724958], f1 score: [0.97214303 0.02479379]
