## MLP-mixer _convert 


In [5]:
import torch
import numpy as np
from erc.model import MLPMixer

audio_output = torch.ones([3, 1024]) # Batch, Seq
text_output = torch.ones([3, 768]) # Batch, Seq


# Performs a batch matrix-matrix product of matrices stored in input and mat2.
matmul_output = torch.bmm(audio_output.unsqueeze_(2), text_output.unsqueeze_(1))
print(matmul_output.unsqueeze_(1).shape) # Batch, Color, Width, Hight


model = MLPMixer(
    image_size = (1024, 768),
    channels = 1,
    patch_size = 16,
    dim = 512,
    depth = 12,
    num_classes = 7
)
pred = model(matmul_output) 
print(pred.shape)



## AI-Hub dataset


In [6]:
import os 
from glob import glob

def get_hub_txt(self, txt: str, encoding: str = None)-> Tuple[torch.Tensor, torch.Tensor]:
    if self.tokenizer:
        result: dict = self.tokenizer(text=txt,
                                    padding="max_length",
                                    truncation="only_first",
                                    max_length=self.max_length_txt,
                                    return_attention_mask=True,
                                    return_tensors="pt")
        input_ids = result["input_ids"].squeeze()
        mask = result["attention_mask"].squeeze()
        return input_ids, mask
    else:
        return txt, None
    
class AIHubDialog():
    # PRETRAINED_DATA_PATH = '/home/hoesungryu/workspace/AI-Hub_emotion_dialog'
    def __init__(self, PRETRAINED_DATA_PATH):
        self.txt_folder = sorted(glob(os.path.join(PRETRAINED_DATA_PATH,'annotation')+'/*.csv'))
        self.wav_folder = sorted(glob(os.path.join(PRETRAINED_DATA_PATH,'wav')+'/*.wav'))

    def __len__(self):
        assert len(glob(self.wav_folder)) == len(glob(self.txt_folder))
        return len(glob(self.wav_folder)) 
    
    def __getitem__(self, idx:int):
        data = {}
        txt, _, emotion = pd.read_csv(self.txt_folder[idx]).iloc[0].values
    
        # Txt File
        txt, txt_mask = self.get_hub_txt(txt_path=txt, encoding=self.TEXT_ENCODING)
        data["txt"] = txt
        data["txt_mask"] = txt_mask
    
        # emotion 
        data["emotion"] = self.get_emo(emotion)

        sampling_rate, wav, wav_mask = self.get_wav(wav_path=self.wav_folder[idx])
        data["sampling_rate"] = sampling_rate
        data["wav"] = wav
        data["wav_mask"] = wav_mask

        return data
    
import random 
random.seed(42)

@staticmethod
def sampling_with_ratio(total_len : int, train_ratio = 0.8):
    total_len = wav_folder 
    total_idx = [i for i in range(total_len)]
    train_num = int(total_len * train_ratio)

    train_idx = random.sample(total_idx, train_num)
    valid_idx = list(set(total_idx) - set(train_idx))

    return train_idx, valid_idx

@staticmethod
def get_multiple_elements_in_list(in_list, in_indices):
    """리스트에서 복수인덱스 값을 가져온다"""
    return [in_list[i] for i in in_indices]

torch.Size([3, 7])


## Weighted Cross-entropy


In [14]:
import logging
import hydra

import erc
from torch.utils.data import DataLoader
from datasets import load_from_disk

train_dataset = load_from_disk("/home/hoesungryu/etri-erc/kemdy19-kemdy20_train4")
valid_dataset = load_from_disk("/home/hoesungryu/etri-erc/kemdy19-kemdy20_valid4")


train_dataloadaer = DataLoader(train_dataset, batch_size=2)
sample = next(iter(train_dataloadaer))

In [25]:
train_dataset['emotion'].numpy()

array([0, 1, 2, ..., 3, 3, 3])

In [37]:

class_count = Counter(train_dataset['emotion'].detach().cpu().numpy())
class_count

Counter({0: 898, 1: 365, 2: 1564, 3: 11586, 4: 2088, 5: 701, 6: 308})

In [31]:
nSamples = class_count.values()
nSamples

dict_values([898, 365, 1564, 11586, 2088, 701, 308])

In [35]:
import torch
# nSamples = [887, 6130, 480, 317, 972, 101, 128]
normedWeights = torch.FloatTensor([1 - (x / sum(nSamples)) for x in nSamples])
# normedWeights = torch.FloatTensor(normedWeights)

loss = nn.CrossEntropyLoss(normedWeights)


In [40]:
import logging
import hydra
import torch.nn as  nn 
import erc
from torch.utils.data import DataLoader
from datasets import load_from_disk

train_dataset = load_from_disk("/home/hoesungryu/etri-erc/kemdy19-kemdy20_train4")
valid_dataset = load_from_disk("/home/hoesungryu/etri-erc/kemdy19-kemdy20_valid4")


from collections import Counter

def get_classweights(traindataset)-> torch.FloatTensor:
    class_count = Counter(traindataset['emotion'].numpy())
    nSamples = class_count.values()
    normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]
    return torch.FloatTensor(normedWeights)

train_normedWeight = get_classweights(train_dataset)
valid_normedWeight = get_classweights(valid_dataset)


In [41]:
# loss = nn.CrossEntropyLoss(train_normedWeight)
train_normedWeight


tensor([0.9487, 0.9792, 0.9107, 0.3383, 0.8808, 0.9600, 0.9824])

## Get Ground Truth emotion in Multi-label-class


In [1]:

import erc

validation_fold: int = 4
PRETRAINED_DATA_PATH: str = "./aihub"
mode: str = "train"
wav_processor: str = "kresnik/wav2vec2-large-xlsr-korean"
sampling_rate: int = 16_000
wav_max_length: int = 112_000 # 16_000 * 7, 7secs duration
txt_processor: str = "klue/bert-base"
txt_max_length: int = 64
multilabel: bool = True
load_from_cache_file: bool = True
num_proc: int = 8
batched: bool = True
batch_size: int = 1000 # Not a torch batch_size
writer_batch_size: int = 1000
num_data: int = None
preprocess: bool = True

In [2]:
ds_kwargs = dict(
    # Note for hard-coded kwargs
    # generate_csv=False,
    return_bio=False,
    tokenizer_name=None,
    max_length_wav=wav_max_length,
    max_length_txt=txt_max_length,
    multilabel=multilabel,
    validation_fold=validation_fold,
    mode=mode,
    num_data=num_data,
    # PRETRAINED_DATA_PATH=PRETRAINED_DATA_PATH,
)

ds = erc.datasets.KEMDDataset(**ds_kwargs)

INFO:erc.datasets:Instantiate train Dataset


In [3]:
ds[0]

{'segment_id': 'Sess01_script01_M001',
 'sampling_rate': 16000,
 'wav': tensor([4.5776e-04, 1.5259e-04, 3.0518e-04,  ..., 6.1035e-05, 1.8311e-04,
         2.7466e-04]),
 'wav_mask': tensor([1, 1, 1,  ..., 1, 1, 1]),
 'txt': '어 저 지그 지금 사람 친 거야? 지금 사람 친 거 맞지? 그치?\n',
 'txt_mask': None,
 'emotion': array([0.9, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=object),
 'valence': tensor(1.7000),
 'arousal': tensor(4.),
 'gender': tensor(0)}

## Load weight with Torchlightning


In [None]:
from erc.

WEIGHTS_PATH = '/home/hoesungryu/etri-erc/weights_AI_HUB/26908-valid_acc0.994.ckpt'