# TER using pretrained bert transformer model
## Morgan Sandler (sandle20@msu.edu)

In [38]:
import pandas as pd
import numpy as np
from tqdm import tqdm

### Load the pretrained model from huggingface/transformers

In [2]:
from transformers import pipeline
classifier = pipeline("text-classification",model='bhadresh-savani/bert-base-uncased-emotion', return_all_scores=True)
prediction = classifier("I love using transformers. The best part is wide range of support and its easy to use", )
print(prediction)

2022-11-14 16:20:58.144483: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


Moving 5 files to the new cache system


  0%|          | 0/5 [00:00<?, ?it/s]

Downloading:   0%|          | 0.00/935 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/285 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]



[[{'label': 'sadness', 'score': 0.0005138238193467259}, {'label': 'joy', 'score': 0.9972521662712097}, {'label': 'love', 'score': 0.000744332792237401}, {'label': 'anger', 'score': 0.000740493240300566}, {'label': 'fear', 'score': 0.00032938597723841667}, {'label': 'surprise', 'score': 0.00041974912164732814}]]


### Load IEMOCAP text data from CSV

In [63]:
iemo_df = pd.read_csv('data/iemocap-text-label.csv')
iemo_df.head()

Unnamed: 0.1,Unnamed: 0,file_ids,start_times,stop_times,texts,labels
0,0,Ses01F_impro01_F000,6.2901,8.2357,Excuse me.,neu
1,1,Ses01F_impro01_F001,10.01,11.3925,Do you have your forms?,neu
2,2,Ses01F_impro01_F002,14.8872,18.0175,Yeah.,neu
3,3,Ses01F_impro01_F003,19.29,20.7875,Let me see them.,xxx
4,4,Ses01F_impro01_F004,21.3257,24.74,Is there a problem?,xxx


In [35]:
def argmax(prediction):
    maxi = prediction[0][0]
    for pred in prediction[0]:
        #print(pred)
        if pred['score'] > maxi['score']:
            maxi = pred
    return maxi

In [45]:
argmax(classifier('Do you have your forms?'))

{'label': 'anger', 'score': 0.7170816659927368}

In [79]:
preds = []
for i, row in tqdm(iemo_df.iterrows()):
    try:
        pred_label = argmax(classifier(row['texts']))['label']
    except:
        pred_label = 'xxx'
    #print(row['texts'], pred_label)
    preds.append(pred_label)

9985it [06:18, 26.37it/s]


In [80]:
iemo_df['pred'] = preds

In [84]:
np.unique(preds)

array(['anger', 'fear', 'joy', 'love', 'sadness', 'surprise', 'xxx'],
      dtype='<U8')

In [85]:
np.unique(iemo_df.labels)

array(['ang', 'dis', 'exc', 'fea', 'fru', 'hap', 'neu', 'oth', 'sad',
       'sur', 'xxx'], dtype=object)

In [86]:
# MAPPINGS TO 6 classes: XXX, ANG, DIS, FEA, HAP, SAD
mapping_txt_to_ser = {
    'anger':'ang',
    'fear':'fea',
    'joy':'hap',
    'love':'hap',
    'sadness':'sad',
    'surprise':'sur',
    'xxx':'xxx',
}
mapping_ser_to_txt = {
    'ang':'ang',
    'dis':'xxx',
    'exc':'hap',
    'fea':'fea',
    'fru':'xxx',
    'hap':'hap',
    'neu':'xxx',
    'oth':'xxx',
    'sad':'sad',
    'sur':'sur',
    'xxx':'xxx'
}
corr = []
for i, row in iemo_df.iterrows():
    if mapping_ser_to_txt[row['labels']] == mapping_txt_to_ser[row['pred']]:
        corr.append(1)
    else:
        corr.append(0)

In [92]:
print(round(sum(corr)/len(corr) * 100.0, 1), '% unweighted acc')

16.2 % unweighted acc
