In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, TensorDataset
from tqdm import tqdm
from pathlib import Path
from sklearn.model_selection import train_test_split
import numpy as np
from time import time
import matplotlib.pyplot as plt
from smokingml.utils import plot_and_save_cm
from smokingml.models import MLP_1hl
from smokingml.modules import optimization_loop, evaluate_loop
from smokingml.datasets.nursing_dataset_v1 import (
    NursingDatasetV1,
    WINSIZE,
    nursingv1_train_dev_test_split,
    load_one_session,
    load_sessions,
    load_one_windowed_session,
    load_windowed_sessions,
    utils
)
from sklearn.metrics import precision_recall_fscore_support

In [None]:
# nursingv1_dir = Path('../data/nursingv1_dataset')
# np.random.seed(0)

# # # Using train dev test split function on all sessions
# # train_dataset, dev_dataset, test_dataset = nursingv1_train_dev_test_split(nursingv1_dir, 0.5, 0.2, 0.3)

# session_ids = utils.get_all_session_ids(nursingv1_dir)

# ## Using all sessions with custom class - takes 90 seconds
# start_time = time()
# dataset = NursingDatasetV1(nursingv1_dir, session_ids)
# for X,y in DataLoader(dataset):
#     pass
# print(f'Elapsed Time Custom: {time() - start_time}')

# ## Load all sessions into memory - takes 69 seconds
# start_time = time()
# dataset = load_windowed_sessions(nursingv1_dir, session_ids)
# for X,y in DataLoader(dataset):
#     pass
# print(f'Elapsed Time TensorDataset: {time() - start_time}')

In [38]:
# Load 10 sessions
dev_size = 0.2
batch_size = 64

nursingv1_dir = Path('../data/nursingv1_dataset')
session_ids = utils.get_all_session_ids(nursingv1_dir)[:10]
dataset = load_windowed_sessions(nursingv1_dir, session_ids=session_ids)

train_dataset, dev_dataset = utils.train_test_split_windows(dataset, test_size=dev_size)
trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
devloader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=True)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
model = MLP_1hl(n_hl=10, n_features=WINSIZE*3).to(device)
optimizer = MLP_1hl.get_optimizer(model)
criterion = MLP_1hl.get_criterion()
epochs = 100

optimization_loop(model, trainloader, devloader, criterion, optimizer, epochs, device)

In [62]:
y_train_true, y_train_pred = evaluate_loop(model, trainloader, device)
y_train_true,y_train_pred = y_train_true.flatten(), y_train_pred.flatten()
prec_tr, recall_tr, f1score_tr, _ = precision_recall_fscore_support(
    y_train_true, y_train_pred
)
print(f'Train: precision: {prec_tr}, recall: {recall_tr}, f1 score: {f1score_tr}')
plot_and_save_cm(y_train_true, y_train_pred, "train_cm.jpg")

y_dev_true, y_dev_pred = evaluate_loop(model, devloader, device).flatten()
y_dev_true,y_dev_pred = y_dev_true.flatten(), y_dev_pred.flatten()
prec_dev, recall_dev, f1score_dev, _ = precision_recall_fscore_support(
    y_dev_true, y_dev_pred
)
print(f'Dev: precision: {prec_dev}, recall: {recall_dev}, f1 score: {f1score_dev}')
plot_and_save_cm(y_dev_true, y_dev_pred, "dev_cm.jpg")

ValueError: too many values to unpack (expected 2)