In [1]:
!pip install pandas
!pip install transformers
!pip install accelerate -U
!pip install datasets
!pip install scikit-learn
!pip install wandb



Collecting wandb
  Obtaining dependency information for wandb from https://files.pythonhosted.org/packages/1c/5e/0362fa88679852c7fd3ac85ee5bd949426c4a51a61379010d4089be6d7ac/wandb-0.15.12-py3-none-any.whl.metadata
  Downloading wandb-0.15.12-py3-none-any.whl.metadata (9.8 kB)
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Obtaining dependency information for GitPython!=3.1.29,>=1.0.0 from https://files.pythonhosted.org/packages/8a/7e/20f7e45878b5aed34320fbeeae8f78acc806e7bd708d00b1c6e64b016f5b/GitPython-3.1.37-py3-none-any.whl.metadata
  Downloading GitPython-3.1.37-py3-none-any.whl.metadata (12 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Obtaining dependency information for sentry-sdk>=1.0.0 from https://files.pythonhosted.org/packages/63/25/d22e1e152e4eac10d39d9132d7b5f1ea4bdfa0b9a1d65fc606a7b90aeefb/sentry_sdk-1.32.0-py2.py3-none-any.whl.metadata
  Downloading sentry_sdk-1.32.0-py2.py3-none-any.whl.metadata (9.8 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Using cach

In [1]:
import os
import sys
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from transformers import AutoFeatureExtractor, WhisperForAudioClassification, Trainer, TrainingArguments
import datetime
from sklearn.metrics import accuracy_score
# sys.path.append("E:/university/FYT/repos/multi_modal_ser")
sys.path.append("/home/multi_modal_ser")
from utils.dataset import MMSERDataset
from datasets import load_metric
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from torch.utils.data import Dataset, DataLoader
import torch
from torch.utils.data import Dataset, Subset
from processed_dataset import ProcessedDataset

### HUBERT

In [2]:
MODEL_NAME = "facebook/hubert-large-ls960-ft"

In [3]:
# mmser_ds = torch.load("E:/datasets/preprocessed/dataset/mmser_ds.pt")
mmser_ds = torch.load("/home/mmser_ds.pt")
print("Emotion ID: ", mmser_ds.df_["emotion_id"].unique())
processed_ds = ProcessedDataset(mmser_ds, MODEL_NAME)

Emotion ID:  [2. 1. 3. 0.]


100%|████████████████████████████████████████████████████| 5531/5531 [01:22<00:00, 66.86it/s]


### Build Model

In [4]:
from transformers import AutoProcessor, HubertModel, AutoModel
encoder_model = AutoModel.from_pretrained(MODEL_NAME)

In [5]:
from custom_classifier import CustomClassifier
model = CustomClassifier(MODEL_NAME, mmser_ds.df_["emotion_id"].nunique())

### Split Dataset

In [6]:
meta_df_ = processed_ds.base_ds.df_
sess_dict = meta_df_.groupby("session").groups
all_indices = set(meta_df_.index.tolist())

In [7]:
sess_ds = {}
for sess in sess_dict:
    sess_ds[sess+"_train"] = Subset(processed_ds, 
                                    indices=list(all_indices-set(sess_dict[sess])))
    sess_ds[sess+"_test"] = Subset(processed_ds, 
                                    indices=sess_dict[sess])
    

### Set SESS_ID

In [8]:
def build_ds(sess_id):
    train_size = int(len(sess_ds[sess_id+"_train"])*0.75)
    val_size = len(sess_ds[sess_id+"_train"])-train_size
    train_set, val_set = torch.utils.data.random_split(sess_ds[sess_id+"_train"], [train_size, val_size])
    test_set = sess_ds[sess_id+"_test"]

    print("Train Samples:", len(train_set))
    print("Val Samples:", len(val_set))
    print("Test Samples:", len(test_set))
    
    return train_set, val_set, test_set

##### Freeze

In [11]:
for param in model.encoder.parameters():
    param.requires_grad = False

for param in model.projector.parameters():
    param.requires_grad = True

In [13]:
from custom_trainer import CustomTrainer, compute_metrics

### Pipeline

In [15]:
import wandb
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mbrucehu[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

API: 2999b8f99f0f62b4f64c48a1c8be9a16945183e9

In [14]:
import json

def pipeline_audio(sess_id):
    print(sess_id)
    train_set, val_set, test_set = build_ds(sess_id)
    
    output_dir=os.path.join("/home/multi_modal_ser/finetune_encoder/check_pts", "HUBERT", sess_id, datetime.datetime.now().date().strftime(format="%Y-%m-%d"))

    training_args = TrainingArguments(output_dir,report_to="wandb")
    training_args.remove_unused_columns=False
    training_args.per_device_train_batch_size=40
    training_args.per_device_eval_batch_size=20
    training_args.logging_steps = int(1000/training_args.per_device_train_batch_size)
    training_args.eval_steps = int(1000/training_args.per_device_train_batch_size)
    training_args.evaluation_strategy="steps" 
    training_args.logging_strategy="steps"
    training_args.load_best_model_at_end=True,
    training_args.save_strategy = "no"
    training_args.learning_rate=1e-3
    training_args.num_train_epochs=1

    trainer = CustomTrainer(
        model=model,
        args=training_args,
        train_dataset=train_set,
        eval_dataset=val_set,
        compute_metrics=compute_metrics,
    )
    
    trainer.train()
    trainer.evaluate()
    test_result = trainer.predict(test_set)
    
    FREEZE_PROJ_PATH = "/home/freeze/{}/projector".format(sess_id)
    FREEZE_CLAS_PATH = "/home/freeze/{}/classifier".format(sess_id)
    os.makedirs(FREEZE_PROJ_PATH, exist_ok=True)
    os.makedirs(FREEZE_CLAS_PATH, exist_ok=True)

    FREEZE_PROJ = os.path.join(FREEZE_PROJ_PATH, datetime.datetime.now().date().strftime(format="%Y-%m-%d")+".pt")
    FREEZE_CLAS = os.path.join(FREEZE_CLAS_PATH, datetime.datetime.now().date().strftime(format="%Y-%m-%d")+".pt")

    torch.save(model.projector.state_dict(), FREEZE_PROJ)
    torch.save(model.classifier.state_dict(), FREEZE_CLAS)

    model.projector.load_state_dict(torch.load(FREEZE_PROJ))
    model.classifier.load_state_dict(torch.load(FREEZE_CLAS))
    
    
    json_object = json.dumps(test_result.metrics, indent=4)

    # Writing to sample.json
    with open("{}.json".format(sess_id), "w") as outfile:
        outfile.write(json_object)


In [None]:
for sess_id in list(sess_dict.keys())[6:]:
    pipeline_audio(sess_id)

