In [2]:
import os
from datetime import timedelta

In [3]:
import torch
from torch.utils.data import Subset
import pytorch_lightning as pl
from tqdm.notebook import trange
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from IPython.display import clear_output
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import roc_auc_score

In [4]:
from system import System
from conflab.data_loaders.pose import ConflabPoseExtractor
from conflab.data_loaders.accel import ConflabAccelExtractor
from conflab.data_loaders.person import ConflabDataset, ConflabSubset, ConflabLabelExtractor
from conflab.constants import conflab_pose_path, midge_data_path, conflab_speaking_status_path
from conflab.constants import vid2_start, vid3_start

In [5]:
vid2_seg8_start = vid2_start + timedelta(minutes=14)
vid2_len = (vid3_start - vid2_seg8_start).total_seconds()

In [6]:
vid2_len

217.0

In [7]:
def do_fold(train_ds, test_ds, model_name='resnet', deterministic=False, log_prefix=None):
    # split the train set into train and val
    # use 10% of data for val
    val_ds, train_ds = train_ds.random_split(0.1)

    # data loaders
    data_loader_train = torch.utils.data.DataLoader(
        train_ds, batch_size=64, shuffle=True, num_workers=4,
        collate_fn=None)
    data_loader_val = torch.utils.data.DataLoader(
        val_ds, batch_size=128, shuffle=False, num_workers=4,
        collate_fn=None)
    data_loader_test = torch.utils.data.DataLoader(
        test_ds, batch_size=128, shuffle=False, num_workers=4,
        collate_fn=None)

    system = System(model_name)

    checkpoint_callback = ModelCheckpoint(dirpath="./checkpoints", save_top_k=1, mode="max", monitor="val_auc")
    trainer = pl.Trainer(
        callbacks=[
            EarlyStopping(monitor="val_auc", patience=6, mode="max"),
            checkpoint_callback
        ],
        accelerator='gpu',
        log_every_n_steps=1,
        max_epochs=20,
        deterministic=deterministic,
        logger=pl.loggers.TensorBoardLogger(save_dir="logs/", version=log_prefix))
    trainer.fit(system, data_loader_train, data_loader_val)

    trainer.test(system, data_loader_test, ckpt_path='best')
    return system.test_results

In [8]:
def get_metrics(outputs, labels, type='binary'):
    if type == 'binary':
        proba = torch.sigmoid(outputs)
        pred = (proba > 0.5)

        correct = pred.eq(outputs.bool()).sum().item()
        return {
            'auc': roc_auc_score(labels, proba),
            'correct': correct
        }
    elif type == 'regression':
        return {
            'mse': torch.nn.functional.mse_loss(outputs, labels, reduction='mean'),
            'l1': torch.nn.functional.l1_loss(outputs, labels, reduction='mean')
        }

In [9]:
def do_run(dataset, model_name, random_state, metrics_name='binary', deterministic=False, log_prefix='cv'):
    # split per pid
    pids = set(dataset.get_groups())
    pid_splits = KFold(n_splits=10, random_state=random_state, shuffle=True).split(range(len(pids)))

    outputs = torch.empty((len(ds),))
    for f, (train_pids, test_pids) in enumerate(pid_splits):
        # create datasets   
        train_idx = [i for i, e in enumerate(ds.examples) if e[0] in train_pids]
        test_idx = [i for i, e in enumerate(ds.examples) if e[0] in test_pids]
        print(f'ds split into {len(train_idx)} train and {len(test_idx)} test')

        train_ds = ConflabSubset(dataset, train_idx)
        test_ds = ConflabSubset(dataset, test_idx)

        fold_outputs = do_fold(train_ds, test_ds, model_name, deterministic=deterministic, log_prefix=log_prefix+f'_fold{f}')
        outputs[test_idx] = fold_outputs['proba'].cpu()
        clear_output(wait=True)

    labels = torch.Tensor(ds.get_all_labels())
    run_metrics = get_metrics(outputs, labels, metrics_name)

    return outputs, run_metrics

In [10]:
pose_extractor = ConflabPoseExtractor(conflab_pose_path)
pose_extractor.load_from_pickle('../tracks.pkl')

In [11]:
accel_extractor = ConflabAccelExtractor(midge_data_path)
label_extractor = ConflabLabelExtractor(os.path.join(conflab_speaking_status_path, 'speaking'))

In [12]:
# make windowed examples using the pose tracks.
examples = pose_extractor.make_examples()
# compose the dataset
ds = ConflabDataset(examples, {
    'accel': accel_extractor,
    'label': label_extractor
})

100%|██████████| 8/8 [00:00<00:00, 540.83it/s]


In [13]:
seed=22
pl.utilities.seed.seed_everything(seed, workers=True)
outputs, metrics = do_run(ds, 'inception', random_state=seed, metrics_name='binary', deterministic=False, log_prefix="inception")

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


ds split into 40098 train and 1836 test


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type          | Params
----------------------------------------
0 | model | InceptionTime | 455 K 
----------------------------------------
455 K     Trainable params
0         Non-trainable params
455 K     Total params
1.821     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /mnt/c/Users/Jose/Documents/furnace/conflab/baselines/speaking_status/accel/checkpoints/epoch=12-step=7332.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /mnt/c/Users/Jose/Documents/furnace/conflab/baselines/speaking_status/accel/checkpoints/epoch=12-step=7332.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_auc            0.8130311264822135
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [12]:
seed=22
pl.utilities.seed.seed_everything(seed, workers=True)
outputs, metrics = do_run(ds, 'minirocket', random_state=seed, metrics_name='binary', deterministic=False, log_prefix="minirocket")

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | MiniRocket | 32.3 K
-------------------------------------
30.0 K    Trainable params
2.3 K     Non-trainable params
32.3 K    Total params
0.129     Total estimated model params size (MB)


ds split into 40098 train and 1836 test


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /mnt/c/Users/Jose/Documents/furnace/conflab/baselines/speaking_status/accel/checkpoints/epoch=1-step=1128-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /mnt/c/Users/Jose/Documents/furnace/conflab/baselines/speaking_status/accel/checkpoints/epoch=1-step=1128-v1.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_auc            0.8101333992094861
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [14]:
metrics

{'auc': 0.814978122230924, 'correct': 14362}

# Alexnet architecture

In [12]:
seed=22
pl.utilities.seed.seed_everything(seed, workers=True)
outputs, metrics = do_run(ds, 'alexnet', random_state=seed, metrics_name='binary', deterministic=False, log_prefix="alexnet_bs32")

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | model | MyAlexNet | 316 K 
------------------------------------
316 K     Trainable params
0         Non-trainable params
316 K     Total params
1.265     Total estimated model params size (MB)


ds split into 40098 train and 1836 test


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /mnt/c/Users/Jose/Documents/furnace/conflab/baselines/speaking_status/accel/checkpoints/epoch=11-step=13536.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /mnt/c/Users/Jose/Documents/furnace/conflab/baselines/speaking_status/accel/checkpoints/epoch=11-step=13536.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_auc            0.7768465909090908
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [13]:
metrics

{'auc': 0.7859898687289133, 'correct': 11660}

# Resnet architecture

In [13]:
seed=22
pl.utilities.seed.seed_everything(seed, workers=True)
outputs, metrics = do_run(ds, 'resnet', random_state=seed, metrics_name='binary', deterministic=True, log_prefix="resnet_best")

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type           | Params
-----------------------------------------
0 | model | ResNetBaseline | 505 K 
-----------------------------------------
505 K     Trainable params
0         Non-trainable params
505 K     Total params
2.020     Total estimated model params size (MB)


ds split into 40098 train and 1836 test


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /mnt/c/Users/Jose/Documents/furnace/conflab/baselines/speaking_status/accel/checkpoints/epoch=2-step=1083.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /mnt/c/Users/Jose/Documents/furnace/conflab/baselines/speaking_status/accel/checkpoints/epoch=2-step=1083.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_auc            0.7452346837944664
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
