In [1]:
# Scienctic computing 
import numpy as np
import pandas as pd

# Pytorch
import torch
from torch import Tensor

#Pytorch Lightning
import pytorch_lightning
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

# Python libraries
import _pickle as pkl
import bz2
import warnings
import re
from typing import List
from collections import OrderedDict

# Laforge animation dataset
from lafan1.extract import Anim

# Plotting
import plotly.express as ex
import plotly.graph_objects as go

# Local scripts
from src.prepare_data import DataPreprocess

  warn(f"Failed to load image Python extension: {e}")


In [2]:
DATA_DIR = "data"
MOTION_ZIP = "lafan1/lafan1.zip"
DATASET_DIR = "dataset"
MODEL_DIR = "model"
FRAME_TIME = 0.03333
FRAMES = 7840

In [3]:
def validate_config(config:dict=None) -> None:
    '''
    Validate that the model config contains necessary parameters of right type.

    Parameters:
        config (dict): a dictionary containing the configuration of NN model.
    Returns:
        None
    '''
    model_config_template = {
        "input_dim" : int,      # Required
        "output_dim" : int,     # Required
        "hidden_dim" : int,     # Required
        "dropout" : 0.2,
        "optimizer" : torch.optim.AdamW,
        "loss_fn" : torch.nn.functional.mse_loss,
        "scheduler" : torch.optim.lr_scheduler.StepLR,
        "scheduler_params" : (10, 0.95),    # decay with 95% every 10 epochs
        "batch_size" : 16,
        "learning_rate" : 1e-3,
        "num_workers" : os.cpu_count(),
        "checkpoint_save_period" : 10, # saves a checkpoint every 10 epochs if it's better than prev checkpoint
        "model_dir" : MODEL_DIR,
        "device" : "cuda" if torch.cuda.is_available() else "cpu", 

    }
    
    required_parameters = ["input_dim", "output_dim", "hidden_dim"]
    if config is None or sum([param in config for param in required_parameters]) != len(required_parameters):
        raise ValueError("""
        Model config is not provided or not valid.
        Please provide a dictionary with at least the following three parameters:
        {
            'input_dim' : int,
            'output_dim' : int,
            'hidden_dim' : int,
        }
        """)
    
    for k,v in model_config_template.items():
        if isinstance(v, type):
            if not isinstance(config[k], v):
                warnings.warn("Received {} of type {} should be {}".format(k, type(config[k]), v))
        config.setdefault(k, v)

def read_pbz2(filename:str) -> Anim:
    with bz2.BZ2File(filename, 'rb') as f:
        anim = pkl.load(f)
    return anim 
    

In [4]:
class MLP_Lightning(pytorch_lightning.LightningModule):
    def __init__(self, config:dict, train_set:List[tuple]=None, test_set:List[tuple]=None, val_set:List[tuple]=None):
        validate_config(config)
        super(MLP_Lightning, self).__init__()
        
        self.config = config
        self.train_set = train_set
        self.test_set = test_set
        self.val_set = val_set if val_set is not None else test_set
        self.best_val_loss = np.inf # for monitoring validation error during training 

        self.module = torch.nn.Sequential(  # ReLu and Dropout are applied as layers 
            OrderedDict([
                ('input', torch.nn.Linear(config["input_dim"], config["hidden_dim"])),
                ('relu1', torch.nn.ReLU()),
                ('dropout1', torch.nn.Dropout(p=config["dropout"])),
                ('hidden', torch.nn.Linear(config["hidden_dim"], config["hidden_dim"])),
                ('relu2', torch.nn.ReLU()),
                ('dropout2', torch.nn.Dropout(p=config["dropout"])),
                ('output', torch.nn.Linear(config["hidden_dim"], config["output_dim"])),
          ])
        )
        self.init_params()
        self.module.to(config["device"])

    ####################################
    # Essential functions
    ####################################
    
    def init_params(self):
        for layer in self.module.modules():
            if isinstance(layer, torch.nn.Linear):
                torch.nn.init.xavier_uniform_(layer.weight)
                layer.bias.data.fill_(.001)

    def forward(self, x:Tensor) -> Tensor:
        return self.module(x)
    
    def loss(self, x, y):
        return self.config["loss_fn"](x,y)
    

    ####################################
    # Extra Pytorch Lightning functions 
    ####################################
    
    def configure_optimizers(self):
        optimizer = self.config["optimizer"](self.parameters(), lr=self.config["learning_rate"])
        if "scheduler" in self.config:
            scheduler = self.config["scheduler"](optimizer, **self.config["scheduler_params"])
            return [optimizer], [scheduler]
        return optimizer

    def train_dataloader(self):
        return torch.utils.data.DataLoader(self.train_set, batch_size=self.config["batch_size"], pin_memory=True, num_workers=self.config["num_workers"])

    def val_dataloader(self):
        return torch.utils.data.DataLoader(self.val_set, batch_size=self.config["batch_size"], pin_memory=True, num_workers=self.config["num_workers"])

    def test_dataloader(self):
        return torch.utils.data.DataLoader(self.test_set, batch_size=self.config["batch_size"], pin_memory=True, num_workers=self.config["num_workers"])

    def training_step(self, batch:tuple, batch_idx:int) -> dict:
        x, y = batch
        x = x.to(self.config["device"])
        y = y.to(self.config["device"])
        prediction = self(x)
        mse_loss = self.loss(prediction, y)

        self.log("ptl/train_loss", mse_loss)
        return mse_loss

    def validation_step(self, batch:tuple, batch_idx:int) -> dict:
        x, y = batch
        x = x.to(self.config["device"])
        y = y.to(self.config["device"])
        prediction = self(x)
        mse_loss = self.loss(prediction, y)

        self.log("ptl/val_loss", mse_loss, prog_bar=True)
        return {"val_loss":mse_loss}

    def test_step(self, batch:tuple, batch_idx:int) -> dict:
        x, y = batch
        x = x.to(self.config["device"])
        y = y.to(self.config["device"])
        prediction = self(x)
        mse_loss = self.loss(prediction, y)

        self.log("ptl/test_loss", mse_loss)
        return {"test_loss":mse_loss}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        self.log("avg_val_loss", avg_loss)
        if avg_loss < self.best_val_loss:
            self.best_val_loss = avg_loss
            if self.current_epoch % self.config["checkpoint_save_period"] == 0:
                self.save_checkpoint()

    def save_checkpoint(self, checkpoint_dir=MODEL_DIR):
        model_dict = {
            "config":self.config,
            "avg_val_loss":np.round(self.best_val_loss.item(),2),
            "parameters":self.module.state_dict(),
        }

        if not os.path.exists(checkpoint_dir):
            os.mkdir(checkpoint_dir)

        filePath = os.path.join(checkpoint_dir, "MLP_{}.pbz2".format(model_dict["avg_val_loss"]))
        with bz2.BZ2File(filePath, "w") as file_ref:
            pkl.dump(model_dict, file_ref)

    @staticmethod
    def load_checkpoint(filePath:str):
        with bz2.BZ2File(filePath, "rb") as f:
            model_dict = pkl.load(f)

        # Reconstruct the model from config
        model = MLP_Lightning(config=model_dict["config"])

        # Load the pre-trained parameters
        model.module.load_state_dict(model_dict["parameters"])

        return model
    

In [5]:
# Read all preprocessed walk clips from disk from 'prepare_data.ipynb'
# clips = a list of clips of dimension 7840 x 12
clips = [read_pbz2(os.path.join(DATASET_DIR, file)) for file in os.listdir(DATASET_DIR)]

In [6]:
# Prepare input samples and ground truths
# Splitting the dataset into three sets for training, validating, testing
# Now we are working with tensors (type = torch.Tensor)

torch.set_default_dtype(torch.float64)  # set the default data type to be double (default: float)

# Remember that clip has shape (Frames x Features) = (7840 x 12)
x_tensors = torch.concat([torch.from_numpy(clip[:-1]) for clip in clips],axis=0)  # inputs for the model
y_tensors = torch.concat([torch.from_numpy(clip[1:]) for clip in clips],axis=0)   # ground truth for the data, which is the next frame of the input frame

# dropping acceleration data from ground_truth data, which are 7th to 9th column 
target_feature_indices = [i for i in range(12) if i not in (6,7,8)] 
y_tensors = y_tensors[:, target_feature_indices]

input_dim = x_tensors.shape[1]  # 12
output_dim = y_tensors.shape[1] # 9

dataset = torch.utils.data.TensorDataset(x_tensors, y_tensors)

num_samples = len(x_tensors) # sample = frame = row
num_train_samples = int(0.8 * num_samples)  # 80% for training, 10% for validation and 20% for testing
num_test_samples = int(num_samples - num_train_samples) 
num_val_samples = num_test_samples // 2

# Randomly split dataset into the 3 sets given the number of samples in each set
train_set, val_set, test_set = torch.utils.data.dataset.random_split(
    dataset, [num_train_samples, num_val_samples, num_val_samples],
    generator=torch.Generator().manual_seed(2048))

# reuse validation set for testing
test_set += val_set


In [7]:
num_cpus = os.cpu_count()
config = {
    "input_dim" : input_dim,
    "output_dim" : output_dim,
    "hidden_dim" : 32,     
    "dropout" : 0.2,
    "optimizer" : torch.optim.AdamW,
    "loss_fn" : torch.nn.functional.mse_loss,
    "scheduler" : torch.optim.lr_scheduler.StepLR,
    "scheduler_params" : {"step_size":10, "gamma":.95},    # decay with 95% every 10 epochs
    "batch_size" : 512,
    "learning_rate" : 1e-3,
    "num_workers" : os.cpu_count(),
    "checkpoint_save_period" : 10, # saves a checkpoint every 10 epochs if it's better than prev checkpoint
    "model_dir" : MODEL_DIR,
    "device" : "cuda" if torch.cuda.is_available() else "cpu", 
}

model = MLP_Lightning(config, train_set, test_set, val_set)
print(model.summarize(max_depth=2))



  | Name            | Type       | Params
-----------------------------------------------
0 | module          | Sequential | 1.8 K 
1 | module.input    | Linear     | 416   
2 | module.relu1    | ReLU       | 0     
3 | module.dropout1 | Dropout    | 0     
4 | module.hidden   | Linear     | 1.1 K 
5 | module.relu2    | ReLU       | 0     
6 | module.dropout2 | Dropout    | 0     
7 | module.output   | Linear     | 297   
-----------------------------------------------
1.8 K     Trainable params
0         Non-trainable params
1.8 K     Total params
0.007     Total estimated model params size (MB)


  print(model.summarize(max_depth=2))


In [10]:
useEarlyStopping = True
max_epochs = 10
min_epochs = 1
early_stopping_patience = 2

In [11]:
if useEarlyStopping:
    earlystopping = EarlyStopping(monitor="avg_val_loss",patience=2)
    callbacks = [earlystopping]
else:
    callbacks = []

logger=TensorBoardLogger(save_dir="log/", name="MLP", version="0.1")

trainer = pytorch_lightning.Trainer(logger=logger, gpus=1, precision=16)
result_1 = trainer.test(model)

trainer = pytorch_lightning.Trainer(
    default_root_dir="checkpoints",
    gpus=1, precision=16,
    callbacks= callbacks,
    min_epochs=min_epochs,
    logger=logger,
    max_epochs=max_epochs,
)

trainer.fit(model)
result_2 = trainer.test(model)

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing:   3%|▎         | 1/34 [00:01<00:33,  1.00s/it]--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'ptl/test_loss': 0.8225607309556744}
--------------------------------------------------------------------------------
Testing: 100%|██████████| 34/34 [00:01<00:00, 31.75it/s]

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params
--------------------------------------
0 | module | Sequential | 1.8 K 
--------------------------------------
1.8 K     Trainable params
0         Non-trainable params
1.8 K     Total params
0.004     Total estimated model params size (MB)



Epoch 9: 100%|██████████| 153/153 [00:01<00:00, 87.28it/s, loss=0.0681, v_num=0.1, ptl/val_loss=0.0116] 

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Testing:   3%|▎         | 1/34 [00:00<00:16,  1.96it/s]--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'ptl/test_loss': 0.01159143799431821}
--------------------------------------------------------------------------------
Testing: 100%|██████████| 34/34 [00:00<00:00, 55.60it/s]


In [12]:
print("Avg. test loss before training: {:.3f}".format(result_1[0]["ptl/test_loss"]))
print("Avg. test loss after training: {:.3f}".format(result_2[0]["ptl/test_loss"]))

Avg. test loss before training: 0.823
Avg. test loss after training: 0.012


In [16]:
saved_checkpoints = os.listdir(config["model_dir"])
best_model_path = os.path.join(config["model_dir"], saved_checkpoints[-1])
model2 = MLP_Lightning.load_checkpoint(best_model_path)
print(best_model_path)
print(model2.summarize(max_depth=2))

model/MLP_0.06.pbz2
  | Name            | Type       | Params
-----------------------------------------------
0 | module          | Sequential | 1.8 K 
1 | module.input    | Linear     | 416   
2 | module.relu1    | ReLU       | 0     
3 | module.dropout1 | Dropout    | 0     
4 | module.hidden   | Linear     | 1.1 K 
5 | module.relu2    | ReLU       | 0     
6 | module.dropout2 | Dropout    | 0     
7 | module.output   | Linear     | 297   
-----------------------------------------------
1.8 K     Trainable params
0         Non-trainable params
1.8 K     Total params
0.007     Total estimated model params size (MB)




In [18]:
# Using the model
import time
bvh_files = os.listdir(DATA_DIR)
walk_clips = [os.path.join(DATA_DIR,f) for f in bvh_files if re.search("walk", f) is not None]
data = DataPreprocess(write=False)(walk_clips[-1])

input_frames = torch.from_numpy(data["data"][:-1])
target_frames = torch.from_numpy(data["data"][1:])
target_frames = target_frames[:, target_feature_indices]

num_frames = len(target_frames)
predicted_frames = [None]*num_frames

model.eval()
model.to("cpu")
start_time = time.time()
idx = 0
loss = 0
with torch.no_grad():
    for x,y in zip(input_frames, target_frames):
        predicted = model(x)
        loss += torch.nn.functional.mse_loss(predicted, y)
        predicted_frames[idx] = predicted
        idx += 1
end_time = time.time()
predicted_frames = np.asarray([p.numpy() for p in predicted_frames])

print(
    f"""Finished one clip with {num_frames} frames in {end_time - start_time} sec\n
    Avg time per frame: {(end_time-start_time)/num_frames}\n 
    Avg. loss: {loss/num_frames}\n"""
    )


Finished one clip with 4887 frames in 0.3156147003173828 sec

    Avg time per frame: 6.458250466899587e-05
 
    Avg. loss: 0.010777842586677274



In [19]:
F = 20
fig = go.Figure()
fig.add_trace(
    go.Scatter3d(x=target_frames[:F, 0], y=target_frames[:F, 1], z=target_frames[:F, 2], name="Target")
).add_trace(
    go.Scatter3d(x=predicted_frames[:F, 0], y=predicted_frames[:F, 1], z=predicted_frames[:F, 2], name="Predicted")
)
fig.show()