In [1]:
!pip install pandas
!pip install transformers
!pip install accelerate -U
!pip install datasets
!pip install scikit-learn
!pip install wandb

[0m

In [2]:
import os
import sys
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from transformers import AutoFeatureExtractor, WhisperForAudioClassification, Trainer, TrainingArguments
import datetime
from sklearn.metrics import accuracy_score
# sys.path.append("E:/university/FYT/repos/multi_modal_ser")
sys.path.append("/home/multi_modal_ser")
from utils.dataset import MMSERDataset
from datasets import load_metric
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from torch.utils.data import Dataset, DataLoader
import torch
from torch.utils.data import Dataset, Subset
from processed_dataset import ProcessedDataset

In [3]:
import sys
import logging

nblog = open("hubert_pipe_nb.log", "a+")
sys.stdout.echo = nblog
sys.stderr.echo = nblog

get_ipython().log.handlers[0].stream = nblog
get_ipython().log.setLevel(logging.INFO)

%autosave 5

Autosaving every 5 seconds


### HUBERT

In [4]:
MODEL_NAME = "facebook/hubert-large-ls960-ft"

In [5]:
# mmser_ds = torch.load("E:/datasets/preprocessed/dataset/mmser_ds.pt")
mmser_ds = torch.load("/home/mmser_ds.pt")
print("Emotion ID: ", mmser_ds.df_["emotion_id"].unique())
processed_ds = ProcessedDataset(mmser_ds, MODEL_NAME)

Emotion ID:  [2. 1. 3. 0.]


100%|██████████| 5531/5531 [00:09<00:00, 602.10it/s]


### Build Model

In [6]:
from transformers import AutoProcessor, HubertModel, AutoModel
encoder_model = AutoModel.from_pretrained(MODEL_NAME)

Some weights of HubertModel were not initialized from the model checkpoint at facebook/hubert-large-ls960-ft and are newly initialized: ['hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
from custom_classifier import CustomClassifier

### Split Dataset

In [8]:
meta_df_ = processed_ds.base_ds.df_
sess_dict = meta_df_.groupby("session").groups
all_indices = set(meta_df_.index.tolist())

In [9]:
sess_ds = {}
for sess in sess_dict:
    sess_ds[sess+"_train"] = Subset(processed_ds, 
                                    indices=list(all_indices-set(sess_dict[sess])))
    sess_ds[sess+"_test"] = Subset(processed_ds, 
                                    indices=sess_dict[sess])
    

### Set SESS_ID

In [10]:
def build_ds(sess_id):
    train_size = int(len(sess_ds[sess_id+"_train"])*0.75)
    val_size = len(sess_ds[sess_id+"_train"])-train_size
    train_set, val_set = torch.utils.data.random_split(sess_ds[sess_id+"_train"], [train_size, val_size])
    test_set = sess_ds[sess_id+"_test"]

    print("Train Samples:", len(train_set))
    print("Val Samples:", len(val_set))
    print("Test Samples:", len(test_set))
    
    return train_set, val_set, test_set

In [11]:
from custom_trainer import CustomTrainer, compute_metrics

### Pipeline

In [12]:
import wandb
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmmser[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

API: 2999b8f99f0f62b4f64c48a1c8be9a16945183e9

In [13]:
import json

def pipeline_audio(sess_id):

    model = CustomClassifier(MODEL_NAME, mmser_ds.df_["emotion_id"].nunique())
    for param in model.encoder.parameters():
        param.requires_grad = False
    for param in model.projector.parameters():
        param.requires_grad = True

    wandb.init()
    print(sess_id)
    train_set, val_set, test_set = build_ds(sess_id)
    
    output_dir=os.path.join("/home/multi_modal_ser/finetune_encoder/check_pts", "HUBERT", sess_id, datetime.datetime.now().date().strftime(format="%Y-%m-%d"))

    training_args = TrainingArguments(output_dir,report_to="wandb")
    training_args.remove_unused_columns=False
    training_args.per_device_train_batch_size=40
    training_args.per_device_eval_batch_size=20
    training_args.logging_steps = int(1000/training_args.per_device_train_batch_size)
    training_args.eval_steps = int(1000/training_args.per_device_train_batch_size)
    training_args.evaluation_strategy="steps" 
    training_args.logging_strategy="steps"
    training_args.load_best_model_at_end=True,
    training_args.save_strategy = "no"
    training_args.learning_rate=7e-4
    training_args.num_train_epochs=50

    trainer = CustomTrainer(
        model=model,
        args=training_args,
        train_dataset=train_set,
        eval_dataset=val_set,
        compute_metrics=compute_metrics,
    )
    
    trainer.train()
    eval_result = trainer.evaluate()
    test_result = trainer.predict(test_set).metrics
    
    FREEZE_PROJ_PATH = "/home/freeze/{}/projector".format(sess_id)
    FREEZE_CLAS_PATH = "/home/freeze/{}/classifier".format(sess_id)
    os.makedirs(FREEZE_PROJ_PATH, exist_ok=True)
    os.makedirs(FREEZE_CLAS_PATH, exist_ok=True)

    FREEZE_PROJ = os.path.join(FREEZE_PROJ_PATH, datetime.datetime.now().date().strftime(format="%Y-%m-%d")+".pt")
    FREEZE_CLAS = os.path.join(FREEZE_CLAS_PATH, datetime.datetime.now().date().strftime(format="%Y-%m-%d")+".pt")

    torch.save(model.projector.state_dict(), FREEZE_PROJ)
    torch.save(model.classifier.state_dict(), FREEZE_CLAS)

    model.projector.load_state_dict(torch.load(FREEZE_PROJ))
    model.classifier.load_state_dict(torch.load(FREEZE_CLAS))
    
    print(eval_result)
    print(test_result)
    
    json_test = json.dumps(test_result, indent=4)
    json_eval = json.dumps(eval_result, indent=4)

    # Writing to sample.json
    with open("{}_eval.json".format(sess_id), "w") as outfile:
        outfile.write(json_eval)
    with open("{}_test.json".format(sess_id), "w") as outfile:
        outfile.write(json_test)


In [None]:
for sess_id in list(sess_dict.keys())[7:]:
    pipeline_audio(sess_id)

Some weights of HubertModel were not initialized from the model checkpoint at facebook/hubert-large-ls960-ft and are newly initialized: ['hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'hubert.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Ses04M
Train Samples: 3789
Val Samples: 1264
Test Samples: 478


Step,Training Loss,Validation Loss,Wa,Ua,F1,Accuracy
25,1.3012,1.2782,0.380538,0.416567,0.349504,0.380538
50,1.2167,1.25139,0.422468,0.443883,0.417946,0.422468
75,1.1786,1.180443,0.456487,0.483109,0.439458,0.456487


  metric_f1 = load_metric("f1")
