# Apply CNN and save skeletons as parquet files

In [1]:
from datetime import datetime
now = datetime.now()
dt_string = now.strftime("%d.%m.%Y %H:%M:%S")
print("Run at:", dt_string)

Run at: 15.04.2024 06:34:32


In [2]:
# Enable interactive plot
#@formatter:off
%matplotlib inline
%load_ext autoreload
%autoreload 2
#@formatter:on

import pandas as pd

import torch
from datasets.RSO_LModule import RSO_LModule
from torch.utils.data import DataLoader, ConcatDataset

import lightning as pl

from pathlib import Path
import importlib

from run import path_resolution, extract_pred

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
debug = False
dry_run = False
num_worker = 5
computease = False

SAMPLE_RATE = 30.0
plot_length = int(SAMPLE_RATE * 30) # ie half a minute

# lib = "OwnBaselineCNN"
# lib = "CNNModel_LiChan2014"
# lib = "CNNModel_LiChan2014_AvgPool"
lib = "ResNet50_SunShangetAl"
# params = "hp_default"
# params = "hp_l1_loss"
params = "hp_dropna"

In [4]:
# path resolution
base_path = path_resolution()

 -- rsync --
Calling: rsync -av /share/temp/yhartmann/smart-cities-journal-based-on-jonahs-ma/data/ /home/yale1/ma-jonah-data/
sending incremental file list

sent 2,949 bytes  received 31 bytes  5,960.00 bytes/sec
total size is 20,378,829,669  speedup is 6,838,533.45
 -- rsync finished --

Base path: /home/yale1/ma-jonah-data/


In [5]:
# resolve checkpoints
checkpoint_path = Path(f"./checkpoints/").resolve()
checkpoint_path.mkdir(parents=True, exist_ok=True)
model_checkpoint_path = f"{str(checkpoint_path / lib)}.{params}.ckpt"
print(f"model_checkpoint_path: {model_checkpoint_path}")

model_checkpoint_path: /share/temp/yhartmann/smart-cities-journal-based-on-jonahs-ma/pipeline/checkpoints/ResNet50_SunShangetAl.hp_dropna.ckpt


# Load model

In [6]:
NNModule = importlib.import_module(f"models.{lib}")
model = NNModule.NeuralNetwork.load_from_checkpoint(checkpoint_path=model_checkpoint_path)
hyper_params = getattr(NNModule, params)
hyper_params['trainer_params']['devices'] = 1

2024-04-15 06:34:37,325 - torch.distributed.nn.jit.instantiator - INFO - Created a temporary directory at /tmp/tmprxj1rw79
2024-04-15 06:34:37,329 - torch.distributed.nn.jit.instantiator - INFO - Writing /tmp/tmprxj1rw79/_remote_module_non_scriptable.py


/home/yale1/miniconda3/envs/smart-cities/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:199: Attribute 'loss_function' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss_function'])`.


In [7]:
# hyper_params['data_params']['fix_nan'] = True
# hyper_params

{'loss_function': L1Loss(),
 'optimizer': torch.optim.adam.Adam,
 'data_params': {'shuffle': True, 'batch_size': 46, 'fix_nan': True},
 'model_params': {'output_size': 63},
 'scheduler_params': {'milestones': [10, 20], 'gamma': 0.1},
 'optimizer_params': {'lr': 0.0001},
 'trainer_params': {'max_epochs': 100, 'precision': '32', 'devices': 1},
 'early_stopping_params': {'monitor': 'val_loss',
  'mode': 'min',
  'patience': 5}}

In [8]:
# use tensor cores
if computease:
    torch.set_float32_matmul_precision('medium')
# else:
#     hyper_params['data_params']['batch_size'] = 8

## Create Datasets

In [9]:
class RSO_LModule_apply_cnn(RSO_LModule):
    def setup(self, stage: str):
        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage == "predict":
            self.d = self._load_datasets(sessions=[1, 2, 3, 4, 5, 6], only_full_visible=False)
            self.test_data = ConcatDataset(self.d)
    
    def test_dataloader(self):
        return DataLoader(self.test_data, **self.data_loader_args)

data_module = RSO_LModule_apply_cnn(
        data_dir=base_path, 
        n_jobs=num_worker, 
        debug=debug, 
        **hyper_params['data_params'])

In [10]:
# actually run the model and prediction
trainer = pl.Trainer(logger=False, 
            fast_dev_run=dry_run,
            default_root_dir=checkpoint_path, 
            **hyper_params['trainer_params'])

res = trainer.predict(model, data_module)
prediction, target = extract_pred(res)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
QUEUEING TASKS | : 100%|██████████| 6/6 [00:00<00:00, 2552.83it/s]
PROCESSING TASKS | : 100%|██████████| 6/6 [02:08<00:00, 21.39s/it] 
COLLECTING RESULTS | : 100%|██████████| 6/6 [00:00<00:00, 28086.86it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 3431/3431 [22:22<00:00,  2.56it/s]


# Save Skeleton

In [11]:
session_info = pd.concat([d.optitrack_data.assign(session=i+1) for i, d in enumerate(data_module.d)])['session'].reset_index(drop=True)

In [12]:
prd_df = pd.DataFrame(prediction)
prd_df.columns = data_module.columns
prd_df['Session'] = session_info

trg_df = pd.DataFrame(target)
trg_df.columns = data_module.columns
trg_df['Session'] = session_info

prd_df

Unnamed: 0_level_0,Ab,Ab,Ab,Chest,Chest,Chest,Head,Head,Head,Hip,...,RThigh,RThigh,RThigh,RToe,RToe,RToe,RUArm,RUArm,RUArm,Session
Unnamed: 0_level_1,Position,Position,Position,Position,Position,Position,Position,Position,Position,Position,...,Position,Position,Position,Position,Position,Position,Position,Position,Position,Unnamed: 21_level_1
Frame,X,Y,Z,X,Y,Z,X,Y,Z,X,...,X,Y,Z,X,Y,Z,X,Y,Z,Unnamed: 21_level_2
0,34.816116,103.440819,15.523431,36.557854,118.369209,18.232561,35.734913,149.870422,15.792988,34.741917,...,46.965675,95.755692,15.648817,51.037663,5.239282,10.634644,61.727154,133.757492,23.645472,1
1,34.203415,102.704933,15.077739,36.044434,117.543175,17.620770,35.115643,148.816559,15.228624,34.160145,...,46.104671,95.064514,14.658244,49.567928,5.129473,11.877818,60.893978,132.825668,21.551857,1
2,34.006805,103.156639,14.096414,35.741703,118.054741,16.746607,34.933910,149.479263,14.349734,33.946651,...,45.967392,95.478157,14.166105,49.762302,5.163775,10.654462,60.513672,133.431076,21.825674,1
3,33.516331,102.866081,15.092410,35.373463,117.712494,17.684219,34.480320,149.055817,15.284741,33.458916,...,45.597267,95.200394,14.741529,49.210785,5.161929,10.817093,60.550331,133.010559,21.923864,1
4,34.912457,103.369965,15.205575,36.687561,118.299698,17.863331,35.833111,149.792450,15.432262,34.849430,...,46.997116,95.680031,15.149675,50.733173,5.178285,11.581563,61.744392,133.715897,22.718693,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157814,-1.903551,100.026764,126.187622,-3.516900,114.475479,127.517693,-1.974476,145.278946,126.885803,-2.200559,...,-1.599732,92.477371,134.393753,1.773480,4.169450,143.149612,-5.649695,129.613464,144.311295,6
157815,-3.666246,100.004555,126.297203,-5.561305,114.427002,127.343903,-3.887467,145.249863,126.997437,-3.966238,...,-4.814098,92.427856,134.738129,-1.575662,4.146951,143.137634,-10.842616,129.547974,144.222992,6
157816,-5.001548,100.383865,126.464470,-6.573536,114.887848,127.671501,-5.007646,145.769806,127.075691,-5.310528,...,-4.889763,92.860359,134.379837,-2.035601,4.295224,143.728439,-8.870429,129.999176,143.726639,6
157817,-7.911106,99.969940,127.503952,-9.708652,114.401375,128.605774,-8.026447,145.195099,128.206757,-8.215654,...,-8.728952,92.424667,135.923279,-5.961750,4.147422,145.940964,-14.180821,129.508820,145.388779,6


In [13]:
prd_df.to_parquet(f"../data/transformed/{lib}.{params}.prd{'.debug' if debug else ''}.parquet")
trg_df.to_parquet(f"../data/transformed/{lib}.{params}.trg{'.debug' if debug else ''}.parquet")