<a href="https://colab.research.google.com/github/NikitaSUAI/EmotionRecognition/blob/main/RNN_w2v.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# INIT

In [34]:
!pip install pytorch-lightning > /dev/null
!pip install torchmetrics > /dev/null
!pip install comet-ml > /dev/null
!pip install torch-ema > /dev/null

In [35]:
from pathlib import Path
from google.colab import drive
drive.mount('/content/drive')

# !cp -r '/content/drive/MyDrive/OMG_EMO' 'OMG_EMO'

BASE_PATH = Path("/content/drive/MyDrive/OMG_EMO")

TRAIN_PATH = BASE_PATH / "train_set.csv"
VAL_PATH = BASE_PATH / "val_set.csv"
TEST_PATH = BASE_PATH / "test_set.csv"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [36]:
import pandas as pd

train_df = pd.read_csv(TRAIN_PATH).dropna()
val_df = pd.read_csv(VAL_PATH).dropna()
test_df = pd.read_csv(TEST_PATH).dropna()

# Sequance DataLoader

In [39]:
import torch
import pickle
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [40]:
from re import X
class Seq_w2v(Dataset):
  def __init__(self, paths, dfs):
    train_fbanks = dict()
    for path in paths:
      with open(path, "rb") as f:
        train_fbanks.update(pickle.load(f))
    df = pd.concat(dfs)
    x, clf, val, aro, reg = [], [], [], [], []
    row_path = []
    for idx, row in df.iterrows():
      x.append(train_fbanks[row.path])
      clf.append(row.EmotionMaxVote)
      reg.append(np.array((row.valence, row.arousal)))
      val.append(row.valence)
      aro.append(row.arousal)
      row_path.append(row.path)

    tmp = [len(x_) for x_ in x]
    x = [(x_, clf, val, aro, row_path) for _, x_, clf, val, aro, row_path in sorted(zip(tmp, x, clf, val, aro, row_path),  
                                              key=lambda pair: pair[0])][::-1]
    x, clf, val, aro, row_path  = list(map(list, zip(*x)))
    self.x = x
    self.label = [clf, val, aro]
    self.row_path = row_path
  
  def __len__(self):
    return len(self.label[0])
  
  def __getitem__(self, idx):
    labels = [label[idx] for label in self.label]
    feats = self.x[idx]
    return feats[None, :, :], labels[0], labels[1], labels[2]

# Simple RNN Model

In [41]:
from torch import nn
import torch.nn.functional as F
from torchvision.models import resnet34, ResNet34_Weights, mobilenet_v3_small, efficientnet_b0, MobileNet_V3_Small_Weights

class RNN_model(nn.Module):
  def __init__(self) -> None:
    super(RNN_model, self).__init__()
    self.rnn = nn.LSTM(300, 32, 1, batch_first=True, bidirectional=True, dropout=0.5)
    self.norm_1 = nn.LayerNorm(64)
    # self.mid_layer = nn.Linear(300, 32)
    # self.norm_2 = nn.BatchNorm1d(32)
    self.clf = nn.Linear(64, 7)
    self.aro = nn.Linear(64, 1)
    self.val = nn.Linear(64, 1)

  def forward(self, x):
    x, (hn, cn) = self.rnn(x)
    x = torch.mean(x, dim=1)
    # x = x[:, -1, :]
    x = self.norm_1(x)
    
    # x_std = torch.std(x, dim=1)
    # x = torch.cat((x_mean, x_std), dim=1)
    
    # x = self.mid_layer(x)
    # x = self.norm_2(x)
    x = F.leaky_relu(x)
    clf = self.clf(x)
    val = self.val(x)
    aro = self.aro(x)
    return clf, val, aro

In [42]:
def init_weight(distribution):
  def weights_init(m):
    if isinstance(m, nn.Linear):
      distribution(m.weight.data)
      nn.init.constant_(m.bias.data, 0)
    if type(m) in [nn.GRU, nn.LSTM, nn.RNN]:
      for name, param in m.named_parameters():
          if 'weight_ih' in name:
              torch.nn.init.xavier_uniform_(param.data)
          elif 'weight_hh' in name:
              torch.nn.init.orthogonal_(param.data)
          elif 'bias' in name:
              param.data.fill_(0)
  return weights_init

In [43]:
from torchmetrics import Metric
import numpy as np

from sklearn.metrics import f1_score
from scipy.stats import pearsonr

def ccc(y_true, y_pred, **kwargs):
    true_mean = np.mean(y_true)
    true_variance = np.var(y_true)
    pred_mean = np.mean(y_pred)
    pred_variance = np.var(y_pred)

    rho,_ = pearsonr(y_pred,y_true)

    std_predictions = np.std(y_pred)

    std_gt = np.std(y_true)


    ccc = 2 * rho * std_gt * std_predictions / (
        std_predictions ** 2 + std_gt ** 2 +
        (pred_mean - true_mean) ** 2)

    return float(ccc)

class CCC_computer(Metric):
    def __init__(self):
        super().__init__()
        self.add_state("y_hat", default=torch.Tensor([]), dist_reduce_fx="cat")
        self.add_state("y_true", default=torch.Tensor([]), dist_reduce_fx="cat")

    def update(self, preds: torch.Tensor, target: torch.Tensor):
        assert preds.shape == target.shape
        self.y_hat = torch.cat((self.y_hat.double(), preds[0].double()))
        self.y_true = torch.cat((self.y_true.double(), target[0].double()))

    def compute(self):
        res = ccc(self.y_true.detach().cpu().numpy(), self.y_hat.detach().cpu().numpy())
        return res

In [44]:
from torch import optim, Tensor
import pytorch_lightning as pl
import torch.nn.functional as F
from torchmetrics import F1Score, MeanAbsoluteError
from torch_ema import ExponentialMovingAverage
import math


class OMG_model(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = RNN_model()
        self.model.apply(init_weight(nn.init.kaiming_uniform_))
        self.ema = ExponentialMovingAverage(self.model.parameters(), decay=0.99)
        
        self.metric_clf_val_micro = F1Score(num_classes = 7, average="micro")
        self.metric_clf_train_micro = F1Score(num_classes = 7, average="micro")

        self.metric_clf_val_macro = F1Score(num_classes = 7, average="macro")
        self.metric_clf_train_macro = F1Score(num_classes = 7, average="macro")

        self.metric_aro_val = CCC_computer()
        self.metric_aro_train = CCC_computer()

        self.metric_val_val = CCC_computer()
        self.metric_val_train = CCC_computer()

    def training_epoch_end(self, training_step_outputs):
        training_step_outputs = [x["loss"] for x in training_step_outputs]
        train_loss = torch.tensor(training_step_outputs).mean()

        train_clf_micro = self.metric_clf_train_micro.compute()
        self.metric_clf_train_micro.reset()

        train_clf_macro = self.metric_clf_train_macro.compute()
        self.metric_clf_train_macro.reset()

        train_aro = self.metric_aro_train.compute()
        self.metric_aro_train.reset()

        train_val = self.metric_val_train.compute()
        self.metric_val_train.reset()
        
        self.log("train_loss", torch.mean(train_loss))
        self.log("train_F1_micro", train_clf_micro)
        self.log("train_F1_macro", train_clf_micro)
        self.log("train_aro", train_aro)
        self.log("train_val", train_val)

    def training_step(self, batch, batch_idx):
        x, y, z, h = batch
        y_hat, z_hat, h_hat = self.model(x)
        
        self.metric_clf_train_micro.update(y_hat, y)
        self.metric_clf_train_macro.update(y_hat, y)
        self.metric_aro_train.update(z_hat, z[:, None])
        self.metric_val_train.update(h_hat, h[:, None])

        loss = F.cross_entropy(y_hat, y)
        loss += F.mse_loss(z_hat, z[:, None].float())#, log_input=False)
        loss += F.mse_loss(h_hat, h[:, None].float())#, log_input=False)

        return loss

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.model.parameters())
        sheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.99)
        return [optimizer], [sheduler]

    def validation_step(self, batch, batch_idx):
        x, y, z, h = batch
        y_hat, z_hat, h_hat = self.model(x)

        loss = F.cross_entropy(y_hat, y)
        loss += F.mse_loss(z_hat, z[:, None].float())#, log_input=False)
        loss += F.mse_loss(h_hat, h[:, None].float())#, log_input=False)
        
        self.metric_clf_val_micro.update(y_hat, y)
        self.metric_clf_val_macro.update(y_hat, y)
        self.metric_aro_val.update(z_hat, z[:, None])
        self.metric_val_val.update(h_hat, h[:, None])
        return loss

    def validation_epoch_end(self, val_step_outputs):
        loss = torch.tensor(val_step_outputs).mean()

        val_clf_macro = self.metric_clf_val_macro.compute()
        self.metric_clf_val_macro.reset()

        val_clf_micro = self.metric_clf_val_micro.compute()
        self.metric_clf_val_micro.reset()

        val_aro = self.metric_aro_val.compute()
        self.metric_aro_val.reset()
        
        val_val = self.metric_val_val.compute()
        self.metric_val_val.reset()

        self.log("val_loss",  loss)
        self.log("val_F1_macro",    val_clf_macro)
        self.log("val_F1_micro",    val_clf_micro)
        self.log("val_aro",   val_aro)
        self.log("val_val",   val_val)

    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, y, z, h = batch
        return self.model(x.float())
        
    def optimizer_step(self, *args, **kwargs):
      super().optimizer_step(*args, **kwargs)
      self.ema.update(self.model.parameters())

model = OMG_model()

  "num_layers={}".format(dropout, num_layers))
                not been set for this class (CCC_computer). The property determines if `update` by
                default needs access to the full metric state. If this is not the case, significant speedups can be
                achieved and we recommend setting this to `False`.
                We provide an checking function
                `from torchmetrics.utilities import check_forward_full_state_property`
                that can be used to check if the `full_state_update=True` (old and potential slower behaviour,
                default for now) or if `full_state_update=False` can be used safely.
                


In [45]:
idx = 0

In [48]:
import comet_ml
import random
import numpy as np
import torch
from pytorch_lightning.loggers import CometLogger
from pytorch_lightning.callbacks import LearningRateMonitor

EXPIREMENT_NAME = f"W2V_RNN_multytask_CE+MSE"
# EXPIREMENT_NAME = f"tmp_{idx}"

idx += 1

comet_logger = CometLogger(
    api_key="******************",
    workspace="nikittossii",  # Optional
    save_dir=f"/content/drive/MyDrive/model_results/{EXPIREMENT_NAME}/",  # Optional
    project_name="omgemmotion",  # Optional
    experiment_name=EXPIREMENT_NAME,  # Optional
)

lr_monitor = LearningRateMonitor(logging_interval='step')

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
model = OMG_model()

trainer = pl.Trainer(limit_train_batches=128, max_epochs=20,# accelerator="gpu",
                     logger=comet_logger, callbacks=[lr_monitor], gradient_clip_val=0.5, gradient_clip_algorithm="value",
                     default_root_dir=f"/content/drive/MyDrive/model_results/{EXPIREMENT_NAME}/")


INFO:pytorch_lightning.loggers.comet:CometLogger will be initialized in online mode
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [49]:
def collate_fn(batch):
  
  feats, x, y, z = list(map(list, zip(*batch)))
  
  x, y, z = (
      torch.Tensor(x).long(),
      torch.Tensor(y).float(),
      torch.Tensor(z).float()
  )
  feats_len = [f.shape[1] for f in feats]
  max_len = min(feats_len)
  out_data = torch.zeros((len(feats), max_len, 300))
  for idx, feat in enumerate(feats):
    f_len = feat.shape[1]
    out_data[idx, :, :] = torch.from_numpy(feat[0, :max_len, :])
  return out_data.float(), x, y, z

train_dataset = Seq_w2v(["/content/drive/MyDrive/OMG_EMO/feats/w2v_train.pkl",
                          "/content/drive/MyDrive/OMG_EMO/feats/w2v_test.pkl"
                         ],
                         [train_df,
                          test_df
                          ])

test_dataset = Seq_w2v(["/content/drive/MyDrive/OMG_EMO/feats/w2v_val.pkl"], [val_df])

train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn, batch_size=128)
test_dataloader = DataLoader(test_dataset, collate_fn=collate_fn, batch_size=1)

In [50]:
trainer.fit(model=model, train_dataloaders=train_dataloader, val_dataloaders=test_dataloader)

INFO:pytorch_lightning.callbacks.model_summary:
  | Name                   | Type         | Params
--------------------------------------------------------
0 | model                  | RNN_model    | 86.2 K
1 | metric_clf_val_micro   | F1Score      | 0     
2 | metric_clf_train_micro | F1Score      | 0     
3 | metric_clf_val_macro   | F1Score      | 0     
4 | metric_clf_train_macro | F1Score      | 0     
5 | metric_aro_val         | CCC_computer | 0     
6 | metric_aro_train       | CCC_computer | 0     
7 | metric_val_val         | CCC_computer | 0     
8 | metric_val_train       | CCC_computer | 0     
--------------------------------------------------------
86.2 K    Trainable params
0         Non-trainable params
86.2 K    Total params
0.345     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

COMET ERROR: Failed to calculate active processors count. Fall back to default CPU count 1
COMET INFO: Couldn't find a Git repository in '/content' nor in any parent directory. You can override where Comet is looking for a Git Patch by setting the configuration `COMET_GIT_DIRECTORY`
COMET INFO: Experiment is live on comet.ml https://www.comet.com/nikittossii/omgemmotion/7e9d2e1b81bd4b01938c1e869c427b5d



Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.
COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/nikittossii/omgemmotion/7e9d2e1b81bd4b01938c1e869c427b5d
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     lr-AdamW [10]       : (0.0008261686238355864, 0.00099)
COMET INFO:     train_F1_macro [20] : (0.2759961187839508, 0.535795271396637)
COMET INFO:     train_F1_micro [20] : (0.2759961187839508, 0.535795271396637)
COMET INFO:     train_aro [20]      : (-0.0010275086387991905, 0.7393102049827576)
COMET INFO:     train_loss [20]     : (1.2879056930541992, 2.511777400970459)
COMET INFO:     train_val [20]      : (-0.25059565901756287, 0.19976156949996948)
COMET INFO:     val_F1_macro [20]   : (0.10792775452136993, 0.21553131937980652)
COMET INFO:    

# Save model result

In [51]:
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn,)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn,)

prediction_train = trainer.predict(model, train_dataloader)
prediction_test = trainer.predict(model, test_dataloader)

Predicting: 25it [00:00, ?it/s]

COMET ERROR: Failed to calculate active processors count. Fall back to default CPU count 1
COMET INFO: Experiment is live on comet.ml https://www.comet.com/nikittossii/omgemmotion/7e9d2e1b81bd4b01938c1e869c427b5d

COMET INFO: -----------------------------------
COMET INFO: Comet.ml ExistingExperiment Summary
COMET INFO: -----------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/nikittossii/omgemmotion/7e9d2e1b81bd4b01938c1e869c427b5d
COMET INFO:   Others:
COMET INFO:     Name : W2V_RNN_multytask_CE+MSE
COMET INFO: -----------------------------------
COMET INFO: Uploading 1 metrics, params and output messages


Predicting: 25it [00:00, ?it/s]

COMET ERROR: Failed to calculate active processors count. Fall back to default CPU count 1
COMET INFO: Experiment is live on comet.ml https://www.comet.com/nikittossii/omgemmotion/7e9d2e1b81bd4b01938c1e869c427b5d

COMET INFO: -----------------------------------
COMET INFO: Comet.ml ExistingExperiment Summary
COMET INFO: -----------------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/nikittossii/omgemmotion/7e9d2e1b81bd4b01938c1e869c427b5d
COMET INFO:   Others:
COMET INFO:     Name : W2V_RNN_multytask_CE+MSE
COMET INFO: -----------------------------------
COMET INFO: Uploading 1 metrics, params and output messages


In [52]:
with open(f"/content/drive/MyDrive/OMG_EMO/unswers/{EXPIREMENT_NAME}_train.pkl", "wb") as f:
  pickle.dump([prediction_train, train_dataset.row_path], f)

with open(f"/content/drive/MyDrive/OMG_EMO/unswers/{EXPIREMENT_NAME}_test.pkl", "wb") as f:
  pickle.dump([prediction_test, test_dataset.row_path], f)