In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, AutoConfig
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup
from sklearn.metrics import f1_score
import time
import math
from tqdm import tqdm
import torchaudio
import librosa
import IPython.display as ipd
from transformers import AutoConfig, Wav2Vec2Processor
from transformers.models.wav2vec2.modeling_wav2vec2 import (
    Wav2Vec2PreTrainedModel,
    Wav2Vec2Model
)

In [None]:
#Check Parameters
fold = 0
model_name = 'klue/roberta-base'
BATCH_SIZE =64
MAX_LEN =196
MAX_WV_LEN = 4 * 16000
EPOCHS = 30
set_lr = 1e-4

In [2]:
import random
import os 
def seed_everything(seed: int):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

In [3]:
targets = ['neutral',
    'angry',
    'disgust',
    'fear',
    'happy',
    'sad',
    'surprise']

In [4]:
df = pd.read_csv('/workspace/ETRI/new_data.csv')
df = df[['Numb','Segment ID','Total Evaluation','text','max_count']+ targets]

In [5]:
mapping_info = {"neutral":0,"angry":1,"disgust":2,"fear":3,"happy":4,"sad":5,"surprise":6}
df['target'] = df['Total Evaluation'].map(mapping_info)

In [None]:
import re
s1 = re.compile('\n')
s1 = re.compile('\n')

def remove_characters(sentence, lower=True):
    sentence = s1.sub(' ', str(sentence))
    if lower:
        sentence = sentence.lower()
    return sentence

df['text'] = df['text'].map(remove_characters)

In [7]:
df['not_neutral'] = 1 - df['neutral']

In [8]:
wav_dir = '/workspace/ETRI/KEMDy20/wav/'
for i in range(len(df)):
    SegmentID = df.iloc[i,1]
    tmp_dir = wav_dir + "Session" + SegmentID[4:6] +"/" + SegmentID + ".wav"
    df.loc[i,'wav_dir'] = tmp_dir

In [9]:
folds = [['Sess01','Sess02','Sess03','Sess04','Sess05','Sess06','Sess07','Sess08'],
['Sess09','Sess10','Sess11','Sess12','Sess13','Sess14','Sess15','Sess16'],
['Sess17','Sess18','Sess19','Sess20','Sess21','Sess22','Sess23','Sess24'],
['Sess25','Sess26','Sess27','Sess28','Sess29','Sess30','Sess31','Sess32'],
['Sess33','Sess34','Sess35','Sess36','Sess37','Sess38','Sess39','Sess40']]

In [10]:
test_list = '|'.join(folds[fold])

In [11]:
test = df[df['Segment ID'].str.contains(test_list)]

In [12]:
train =df[df['Segment ID'].str.contains(test_list) == False]

In [19]:
train = train.reset_index(drop=True)

In [20]:
test = test.reset_index(drop=True)

In [22]:
train=train.dropna( subset=['Total Evaluation'])

In [23]:
num_labels = 2
input_column = "wav_dir"
output_column = "Total Evaluation"

model_name_or_path = "facebook/wav2vec2-base-960h"
pooling_mode = "mean"

audio_config = AutoConfig.from_pretrained(
    model_name_or_path,
    num_labels=num_labels,
    finetuning_task="wav2vec2_clf",
)
setattr(audio_config, 'pooling_mode', pooling_mode)

In [24]:
processor = Wav2Vec2Processor.from_pretrained(model_name_or_path,)
target_sampling_rate = processor.feature_extractor.sampling_rate
print(f"The target sampling rate: {target_sampling_rate}")

The target sampling rate: 16000


In [25]:
def speech_file_to_array_fn(path):
    speech_array, sampling_rate = torchaudio.load(path)
    resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
    speech = resampler(speech_array).squeeze().numpy()
    input_values = processor(speech, sampling_rate=sampling_rate, return_tensors="pt").input_values
    return input_values

In [26]:
class SentimentDataset(Dataset):
  def __init__(self, subjects, df, tokenizer, max_len,wav_dir,max_wv_len):
    self.subjects = subjects
    self.df = df
    self.tokenizer = tokenizer
    self.max_len = max_len
    self.wav_dir = wav_dir
    self.max_wv_len = max_wv_len
  def __len__(self):
    return len(self.subjects)
  def __getitem__(self, item):
    subject = str(self.subjects[item])
    target = self.df.loc[item,['neutral','not_neutral']].values.astype('float')
    encoding = self.tokenizer.encode_plus(
      subject,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      padding = 'max_length',
      truncation = True,
      return_attention_mask=True,
      return_tensors='pt',
    )

    wav_data = speech_file_to_array_fn(self.wav_dir[item])
    if wav_data.size(-1) > self.max_wv_len:
      wav_data = wav_data[:, :self.max_wv_len]
    else:
      k = self.max_wv_len // wav_data.size(-1)
      tmp = torch.zeros(self.max_wv_len - k * wav_data.size(-1)).unsqueeze(0)
      tmp2 = wav_data
      for i in range(k-1):
        wav_data = torch.cat([wav_data,tmp2], dim=1) 
      wav_data = torch.cat([wav_data,tmp], dim=1) 

    return {
      'subject_text': subject,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.float32),
      'wav_data': wav_data.flatten(),
    }
def create_data_loader(df, tokenizer, max_len, max_wv_len, batch_size, shuffle_=False, valid=False):
  ds = SentimentDataset(
    subjects=df.text.to_numpy(),
    df=df,
    tokenizer=tokenizer,
    max_len=max_len,
    wav_dir=df.wav_dir.to_numpy(),
    max_wv_len = max_wv_len,
  )
  return DataLoader(
    ds,
    batch_size=batch_size,
    num_workers=4,
    shuffle = shuffle_
  )

In [27]:
def calc_review_acc(pred, label):
    _, idx = pred.max(1)
    
    acc = torch.eq(idx, label).sum().item() / idx.size()[0] 
    x = label.cpu().numpy()
    y = idx.cpu().numpy()
    f1_acc = f1_score(x, y, average='macro')
    return acc,f1_acc


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

In [28]:

tokenizer = AutoTokenizer.from_pretrained(model_name)
train_data_loader = create_data_loader(train, tokenizer, MAX_LEN, MAX_WV_LEN, BATCH_SIZE, shuffle_=True)
test_data_loader = create_data_loader(test, tokenizer, MAX_LEN, MAX_WV_LEN, 1, valid=True)

In [29]:
class SentimentClassifier(nn.Module):
  def __init__(self, n_classes,audio_config):
    super(SentimentClassifier, self).__init__()
    self.bert = AutoModel.from_pretrained(model_name)
    self.drop = nn.Dropout(p=0.1)
    self.audio_config = audio_config
    self.pooling_mode = audio_config.pooling_mode
    self.wav2vec2 = Wav2Vec2Model(audio_config)
    def get_cls(target_size= n_classes):
      return nn.Sequential(
          nn.Linear(self.bert.config.hidden_size + self.audio_config.hidden_size, self.bert.config.hidden_size + self.audio_config.hidden_size),
          nn.LayerNorm(self.bert.config.hidden_size + self.audio_config.hidden_size),
          nn.Dropout(p = 0.1),
          nn.ReLU(),
          nn.Linear(self.bert.config.hidden_size + self.audio_config.hidden_size, target_size),
      )  
    self.cls = get_cls(n_classes)



  def freeze_feature_extractor(self):
      self.wav2vec2.feature_extractor._freeze_parameters()

  def merged_strategy(
          self,
          hidden_states,
          mode="mean"
  ):
      if mode == "mean":
          outputs = torch.mean(hidden_states, dim=1)
      elif mode == "sum":
          outputs = torch.sum(hidden_states, dim=1)
      elif mode == "max":
          outputs = torch.max(hidden_states, dim=1)[0]
      else:
          raise Exception(
              "The pooling method hasn't been defined! Your pooling mode must be one of these ['mean', 'sum', 'max']")

      return outputs

  def forward(self, input_ids, attention_mask,input_values,
            audio_attention_mask=None,
            output_attentions=None,
            output_hidden_states=None,
            return_dict=None,):
    _, pooled_output = self.bert(
      input_ids=input_ids,
      attention_mask=attention_mask,
      return_dict=False
    )
    output = self.drop(pooled_output)

    return_dict = return_dict if return_dict is not None else self.audio_config.use_return_dict
    output2 = self.wav2vec2(
            input_values,
            attention_mask=audio_attention_mask,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
    hidden_states = output2[0]
    hidden_states = self.merged_strategy(hidden_states, mode=self.pooling_mode)

    output2 = self.drop(hidden_states)

    output = torch.cat([output,output2],1) 
    out = self.cls(output)

    return out

In [30]:
device = torch.device("cuda")

EPOCHS = 20
model = SentimentClassifier(n_classes=2,audio_config = audio_config).to(device)
optimizer = optim.AdamW(model.parameters(), lr=1e-5)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_cosine_schedule_with_warmup(
  optimizer,
  num_warmup_steps=int(total_steps*0.1),
  num_training_steps=total_steps
)

nSamples = train.target.value_counts().sort_index().tolist()
num = 0
for target in targets:
    nSamples[num] *=train[target].mean()
    num +=1
    
normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]
for i in range(2,len(normedWeights)):
  normedWeights[1] += normedWeights[i]
normedWeights = normedWeights[:2]

normedWeights = torch.FloatTensor(normedWeights).to(device)


loss_fn = nn.MultiLabelSoftMarginLoss(weight=normedWeights).to(device)

Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.decoder.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for

In [32]:
model = nn.DataParallel(model)

In [33]:
def train_epoch(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples):

  batch_time = AverageMeter()     
  data_time = AverageMeter()      
  losses = AverageMeter()         
  accuracies = AverageMeter()
  f1_accuracies = AverageMeter()
  
  sent_count = AverageMeter()   
    

  start = end = time.time()

  model = model.train()
  correct_predictions = 0
  for step,d in enumerate(data_loader):
    data_time.update(time.time() - end)
    batch_size = d["input_ids"].size(0) 
    wav_data = d["wav_data"].to(device)
    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    targets = d["targets"].to(device)
    outputs = model(
      input_ids=input_ids,
      attention_mask=attention_mask,
      input_values=wav_data,
    )
    _, preds = torch.max(outputs, dim=1)
    loss = loss_fn(outputs, targets)
    _, targets_max = torch.max(targets, dim=1)
    correct_predictions += torch.sum(preds == targets_max)
    losses.update(loss.item(), batch_size)
    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()

    batch_time.update(time.time() - end)
    end = time.time()

    sent_count.update(batch_size)
    if step % 50 == 0 or step == (len(data_loader)-1):
                acc,f1_acc = calc_review_acc(outputs, targets_max)
                accuracies.update(acc, batch_size)
                f1_accuracies.update(f1_acc, batch_size)

                
                print('Epoch: [{0}][{1}/{2}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.3f}({loss.avg:.3f}) '
                      'Acc: {acc.val:.3f}({acc.avg:.3f}) '   
                      'f1_Acc: {f1_acc.val:.3f}({f1_acc.avg:.3f}) '           
                      'sent/s {sent_s:.0f} '
                      .format(
                      epoch, step+1, len(data_loader),
                      data_time=data_time, loss=losses,
                      acc=accuracies,
                      f1_acc=f1_accuracies,
                      remain=timeSince(start, float(step+1)/len(data_loader)),
                      sent_s=sent_count.avg/batch_time.avg
                      ))

  return correct_predictions.double() / n_examples, losses.avg

def validate(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples):
  model = model.eval()
  losses = []
  outputs_arr = []
  preds_arr = []
  targets_max_arr = []
  correct_predictions = 0
  for d in tqdm(data_loader):
    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    wav_data = d["wav_data"].to(device)
    targets = d["targets"].to(device)
    outputs = model(
      input_ids=input_ids,
      attention_mask=attention_mask,
      input_values=wav_data,
    )
    _, preds = torch.max(outputs, dim=1)
    outputs_arr.append(outputs.cpu().detach().numpy()[0])
    preds_arr.append(preds.cpu().numpy())
    
    loss = loss_fn(outputs, targets)
    _, targets_max = torch.max(targets, dim=1)
    correct_predictions += torch.sum(preds == targets_max)
    targets_max_arr.append(targets_max.cpu().numpy())
    losses.append(loss.item())
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
  return correct_predictions.double() / n_examples, np.mean(losses), outputs_arr, preds_arr, targets_max_arr





In [None]:
for epoch in range(EPOCHS):
  print('-' * 10)
  print(f'Epoch {epoch}/{EPOCHS-1}')
  print('-' * 10)
  train_acc, train_loss = train_epoch(
    model,
    train_data_loader,
    loss_fn,
    optimizer, 
    device,
    scheduler,
    len(train)
  )
  print(f'Train loss {train_loss} accuracy {train_acc}')
  print("")
  print("")

In [None]:
validate_acc, validate_loss, outputs_arr, preds_arr, targets_max_arr= validate(
    model,
    test_data_loader,
    loss_fn,
    optimizer,
    device,
    scheduler,
    len(test)
)

In [35]:
tempdf = pd.DataFrame()
k = 0
for i in preds_arr:
    tempdf.loc[k,'pred'] = i[0]
    k +=1

In [None]:
targets = ['neutral',
    'angry',
    'disgust',
    'fear',
    'happy',
    'sad',
    'surprise']
df = pd.read_csv('/workspace/ETRI/new_data.csv')
df = df[['Segment ID','Total Evaluation','text','max_count']+ targets]
mapping_info = {"neutral":0,"angry":1,"disgust":2,"fear":3,"happy":4,"sad":5,"surprise":6}
df['target'] = df['Total Evaluation'].map(mapping_info)
df['text'] = df['text'].map(remove_characters)
wav_dir = '/workspace/ETRI/KEMDy20/wav/'
for i in range(len(df)):
    SegmentID = df.iloc[i,0]
    tmp_dir = wav_dir + "Session" + SegmentID[4:6] +"/" + SegmentID + ".wav"
    df.loc[i,'wav_dir'] = tmp_dir

test_list = '|'.join(folds[fold])
test = df[df['Segment ID'].str.contains(test_list)]
train =df[df['Segment ID'].str.contains(test_list) == False]
train = train[train['Total Evaluation'].str.contains('neutral') == False]
train = train.reset_index(drop=True)
test = test.reset_index(drop=True)
train=train.dropna( subset=['Total Evaluation'])

In [None]:
num_labels = 6
input_column = "wav_dir"
output_column = "Total Evaluation"

model_name_or_path = "facebook/wav2vec2-base-960h"
pooling_mode = "mean"

audio_config = AutoConfig.from_pretrained(
    model_name_or_path,
    num_labels=num_labels,
    finetuning_task="wav2vec2_clf",
)
setattr(audio_config, 'pooling_mode', pooling_mode)

In [None]:
processor = Wav2Vec2Processor.from_pretrained(model_name_or_path,)
target_sampling_rate = processor.feature_extractor.sampling_rate
print(f"The target sampling rate: {target_sampling_rate}")

In [None]:
def speech_file_to_array_fn(path):
    speech_array, sampling_rate = torchaudio.load(path)
    resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
    speech = resampler(speech_array).squeeze().numpy()
    input_values = processor(speech, sampling_rate=sampling_rate, return_tensors="pt").input_values
    return input_values

In [None]:
class SentimentDataset(Dataset):
  def __init__(self, subjects, df, tokenizer, max_len,wav_dir,max_wv_len):
    self.subjects = subjects
    self.df = df
    self.tokenizer = tokenizer
    self.max_len = max_len
    self.wav_dir = wav_dir
    self.max_wv_len = max_wv_len
  def __len__(self):
    return len(self.subjects)
  def __getitem__(self, item):
    subject = str(self.subjects[item])
    target = self.df.iloc[item,5:-2].values.astype('float')
    encoding = self.tokenizer.encode_plus(
      subject,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      padding = 'max_length',
      truncation = True,
      return_attention_mask=True,
      return_tensors='pt',
    )

    wav_data = speech_file_to_array_fn(self.wav_dir[item])
    if wav_data.size(-1) > self.max_wv_len:
      wav_data = wav_data[:, :self.max_wv_len]
    else:
      k = self.max_wv_len // wav_data.size(-1)
      tmp = torch.zeros(self.max_wv_len - k * wav_data.size(-1)).unsqueeze(0)
      tmp2 = wav_data
      for i in range(k-1):
        wav_data = torch.cat([wav_data,tmp2], dim=1) 
      wav_data = torch.cat([wav_data,tmp], dim=1) 

    return {
      'subject_text': subject,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.float32),
      'wav_data': wav_data.flatten(),
    }
def create_data_loader(df, tokenizer, max_len, max_wv_len, batch_size, shuffle_=False, valid=False):
  ds = SentimentDataset(
    subjects=df.text.to_numpy(),
    df=df,
    tokenizer=tokenizer,
    max_len=max_len,
    wav_dir=df.wav_dir.to_numpy(),
    max_wv_len = max_wv_len,
  )
  return DataLoader(
    ds,
    batch_size=batch_size,
    num_workers=4,
    shuffle = shuffle_
  )

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
train_data_loader = create_data_loader(train, tokenizer, MAX_LEN, MAX_WV_LEN, BATCH_SIZE, shuffle_=True)
test_data_loader = create_data_loader(test, tokenizer, MAX_LEN, MAX_WV_LEN, 1, valid=True)

In [None]:
class SentimentClassifier(nn.Module):
  def __init__(self, n_classes,audio_config):
    super(SentimentClassifier, self).__init__()
    self.bert = AutoModel.from_pretrained(model_name)
    self.drop = nn.Dropout(p=0.1)
    self.audio_config = audio_config
    self.pooling_mode = audio_config.pooling_mode
    self.wav2vec2 = Wav2Vec2Model(audio_config)
    def get_cls(target_size= n_classes):
      return nn.Sequential(
          nn.Linear(self.bert.config.hidden_size + self.audio_config.hidden_size, self.bert.config.hidden_size + self.audio_config.hidden_size),
          nn.LayerNorm(self.bert.config.hidden_size + self.audio_config.hidden_size),
          nn.Dropout(p = 0.1),
          nn.ReLU(),
          nn.Linear(self.bert.config.hidden_size + self.audio_config.hidden_size, target_size),
      )  
    self.cls = get_cls(n_classes)



  def freeze_feature_extractor(self):
      self.wav2vec2.feature_extractor._freeze_parameters()

  def merged_strategy(
          self,
          hidden_states,
          mode="mean"
  ):
      if mode == "mean":
          outputs = torch.mean(hidden_states, dim=1)
      elif mode == "sum":
          outputs = torch.sum(hidden_states, dim=1)
      elif mode == "max":
          outputs = torch.max(hidden_states, dim=1)[0]
      else:
          raise Exception(
              "The pooling method hasn't been defined! Your pooling mode must be one of these ['mean', 'sum', 'max']")

      return outputs

  def forward(self, input_ids, attention_mask,input_values,
            audio_attention_mask=None,
            output_attentions=None,
            output_hidden_states=None,
            return_dict=None,):
    _, pooled_output = self.bert(
      input_ids=input_ids,
      attention_mask=attention_mask,
      return_dict=False
    )
    output = self.drop(pooled_output)

    return_dict = return_dict if return_dict is not None else self.audio_config.use_return_dict
    output2 = self.wav2vec2(
            input_values,
            attention_mask=audio_attention_mask,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
    hidden_states = output2[0]
    hidden_states = self.merged_strategy(hidden_states, mode=self.pooling_mode)

    output2 = self.drop(hidden_states)

    output = torch.cat([output,output2],1) 
    out = self.cls(output)

    return out

In [None]:
device = torch.device("cuda")

model = SentimentClassifier(n_classes=6,audio_config = audio_config).to(device)
optimizer = optim.AdamW(model.parameters(), lr=1e-5)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_cosine_schedule_with_warmup(
  optimizer,
  num_warmup_steps=int(total_steps*0.1),
  num_training_steps=total_steps
)

targets = [
    'angry',
    'disgust',
    'fear',
    'happy',
    'sad',
    'surprise']

nSamples = train.target.value_counts().sort_index().tolist()
num = 0
for target in targets:
    nSamples[num] *=train[target].mean()
    num +=1
    
normedWeights = [1 - (x / sum(nSamples)) for x in nSamples]
normedWeights = torch.FloatTensor(normedWeights).to(device)


loss_fn = nn.MultiLabelSoftMarginLoss(weight=normedWeights).to(device)

In [None]:
for epoch in range(EPOCHS):
  print('-' * 10)
  print(f'Epoch {epoch}/{EPOCHS-1}')
  print('-' * 10)
  train_acc, train_loss = train_epoch(
    model,
    train_data_loader,
    loss_fn,
    optimizer, 
    device,
    scheduler,
    len(train)
  )
  print(f'Train loss {train_loss} accuracy {train_acc}')
  print("")
  print("")

In [None]:
validate_acc, validate_loss, outputs_arr, preds_arr, targets_max_arr= validate(
    model,
    test_data_loader,
    loss_fn,
    optimizer,
    device,
    scheduler,
    len(test)
)

In [None]:
tempdf2 = pd.DataFrame()
k = 0
for i in preds_arr:
    tempdf2.loc[k,'pred'] = i[0]
    k +=1

for i in range(len(tempdf)):
    if tempdf.loc[i,'pred'] == 1:
        tempdf.loc[i,'pred'] = tempdf2.loc[i,"pred"] + 1
mapping_info = {"neutral":0,"angry":1,"disgust":2,"fear":3,"happy":4,"sad":5,"surprise":6}
df['target'] = df['Total Evaluation'].map(mapping_info)

test_list = '|'.join(folds[fold])
train =df[df['Segment ID'].str.contains(test_list) == False]
valid = df[df['Segment ID'].str.contains(test_list)]

X = valid['target'].to_list()
y = tempdf['pred'].to_list()

In [None]:
from sklearn.metrics import f1_score
f1_score(y, X, average='macro')


In [None]:
f1_score(y, X, average='micro')

In [37]:
from sklearn.metrics import accuracy_score
accuracy_score(y,X)

0.752659724176752

In [38]:
from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y,X)

0.8522727272727273