# **INSTALL LIBRARIES**

In [1]:
# !pip install transformers

# **LIBRARIES**

In [2]:
import json
import numpy as np
import torch
import torch.nn as nn
import os
# Dataset
from PIL import Image
from torchvision import transforms
from torchvision.io import read_video, read_image
from torch.utils.data import Dataset, DataLoader
# Model
from transformers import AutoModel
from transformers import AutoTokenizer, BertModel, BertTokenizer, BertGenerationDecoder

# Training parameter
from torch.optim import Adam
# Training process
from tqdm import tqdm
# Metrics
from sklearn.metrics import classification_report
from collections import OrderedDict

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

# **SETUP PARAMETERS**

In [4]:
FOLDER = '/data/ECF 2.0/'
save_checkpoint_dir = FOLDER + "checkpoints"
save_model_dir = FOLDER + "model"
save_report_dir = FOLDER + "report"
model_name = "BERT"
tokenizer_name = 'michellejieli/emotion_text_classifier'
encoder_name = 'michellejieli/emotion_text_classifier'
decoder_name = 'roberta-base'
optimizer_name = "BERT"
loss_fn_name = "BCE_Loss"
metrics = {}
output_json = FOLDER + "predict"
file_train = {"subtask_1_text_file": FOLDER + "train/Subtask_1_train.json",
              "subtask_2_text_file": FOLDER + "train/Subtask_2_train.json",
              "video_dir": FOLDER + "train/video_with_audio",
              "max_conversation_length": 32,
              "max_emotion_cause_pairs_length": 128}
file_trial = {"subtask_1_text_file": FOLDER + "trial/Subtask_1_trial.json",
              "subtask_2_text_file": FOLDER + "trial/Subtask_2_trial.json",
              "video_dir": FOLDER + "trial/video_with_audio",
              "max_conversation_length": 32,
              "max_emotion_cause_pairs_length": 128}
subtask = "subtask_1"
format = "ECF 2.0"

# Define token to find subtensor
bos_token_id = 101
eos_token_id = 102
max_length = 512

# **UTIL FUNCTIONS**

In [5]:
def read_json_file(file_path):
  with open(file_path, 'r') as f:
    data = json.load(f)
  return data
def find_sub_list_indices(main_list, sub_list):
  start_index = None
  end_index = None
  sub_list = sub_list[0]
  end_sublist = sub_list.index(eof_token)
  sub_list = sub_list[1:end_sublist]
  start_index = main_list.index(sub_list[0])
  end_index = start_index + len(sub_list)
  while main_list[start_index:end_index] != sub_list:
    start_index = main_list.index(sub_list[0], start_index + 1)
    end_index = start_index + len(sub_list)

  return start_index, end_index

# **LOAD MODEL**

In [6]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
encoder = AutoModel.from_pretrained(encoder_name)

Some weights of RobertaModel were not initialized from the model checkpoint at michellejieli/emotion_text_classifier and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# **TRAIN**

In [7]:
# prompt: create trainer class
class Trainer(object):
  def __init__(self, model, optimizer, loss_fn, metrics=None, epochs=10, save_checkpoint_dir='./', save_model_dir='./', save_report_dir='./'):
    self.model = model
    self.optimizer = optimizer
    self.loss_fn = loss_fn
    self.metrics = metrics

    self.epochs = epochs
    self.save_checkpoint_dir = save_checkpoint_dir
    self.save_model_dir = save_model_dir
    self.save_report_dir = save_report_dir

  def train(self, train_loader, val_loader, epochs):
    best_val_loss = np.inf
    for epoch in range(epochs):
      train_loss, train_metrics = self._train_epoch(train_loader)
      val_loss, val_metrics = self._val_epoch(val_loader)
      if val_loss < best_val_loss:
        best_val_loss = val_loss
        self.save_checkpoint(epoch)
      print(f"Epoch {epoch+1}: Train loss {train_loss:.4f}, Train {train_metrics}, Valid loss {val_loss:.4f}, Valid {val_metrics}")

  def _train_epoch(self, dataset):
    self.model.train()
    train_loss = 0.0
    train_metrics = {}
    for batch in dataset:
      outputs = self.model(batch)
      loss = self.loss_fn(outputs, batch["target"])
      self.optimizer.zero_grad()
      loss.backward()
      self.optimizer.step()
      train_loss += loss.item()
      for metric, fn in self.metrics.items():
        train_metrics[metric] = fn(outputs, batch["target"])
    return train_loss / len(dataset), train_metrics

  def _val_epoch(self, dataset):
    self.model.eval()
    test_loss = 0.0
    test_metrics = {}
    for batch in dataset:
      outputs = self.model(batch)
      loss = self.loss_fn(outputs, batch["target"])
      test_loss += loss.item()
      for metric, fn in self.metrics.items():
        test_metrics[metric] = fn(outputs, batch["target"])
    return test_loss / len(dataset), test_metrics
  def save_checkpoint(self, epoch):
    checkpoint_path = os.path.join(self.save_checkpoint_dir, 'checkpoint_{}.pth'.format(epoch))
    torch.save({
        'model_state_dict': self.model.state_dict(),
        'optimizer_state_dict': self.optimizer.state_dict(),
        'epoch': epoch
    }, checkpoint_path)

# **PREDICTION**

In [8]:
class Predictor(object):
  def __init__(self, model, output_json, subtask=None, batch_size=32):
    self.model = model
    self.output_json = output_json
    self.subtask = subtask
    self.batch_size = batch_size
    self.text_to_number_mapping = {"neutral": 0, 'surprise': 1, 'anger': 2, "disgust": 3, "fear": 4, "joy": 5, "sadness": 6}
    self.number_to_text_mapping = {0: "neutral", 1: 'surprise', 2: 'anger', 3: "disgust", 4: "fear", 5: "joy", 6: "sadness"}
    self.predicted_dataset = None

  def emotion_cause_pairs(self, casual_text, predicted_emotion, casual = True):
    text = str(predicted_emotion[0] + 1) + "_" + str(self.number_to_text_mapping[predicted_emotion[1]])
    if casual == True:
      cause = str(predicted_emotion[2] + 1) + "_" + str(casual_text[predicted_emotion[3]: predicted_emotion[4]])
    else:
      cause = str(predicted_emotion[2] + 1)
    return [text, cause]

  def predict(self, dataset):
    dataloader = DataLoader(dataset=dataset, batch_size=self.batch_size, shuffle=False)
    self.model.eval()
    for batch in dataloader:
      outputs = self.model.predict(batch) # (batch_size, max_utterance, 5)
      if self.subtask == "subtask_1":
        dataset=self.predict_subtask_1(batch, dataset, outputs)
      elif self.subtask == "subtask_2":
        dataset=self.predict_subtask_2(batch, dataset, outputs)

    self.save_prediction(dataset)
    
  def predict_subtask_1(self, batch, dataset, outputs):
    for index in range(len(outputs)):
      predicted_emotion = outputs[index] # (max_utterance, 5)
      # [sentence id, emotion, causal sentence id, casual start index, casual end index]
      conversation_ID = batch['conversation_ID'][index]
      dataset[conversation_ID-1]['emotion-cause_pairs'] = []
      for j in range(len(dataset[conversation_ID-1]['conversation'])):
        dataset[conversation_ID-1]['conversation'][j]['emotion'] = "neutral"

      for j in range(len(predicted_emotion)):
        casual_text = dataset[conversation_ID-1]['conversation'][predicted_emotion[2]]
        dataset[conversation_ID-1]['emotion-cause_pairs'].append(self.emotion_cause_pairs(casual_text, predicted_emotion))
    return dataset
  def predict_subtask_2(self, batch, dataset, outputs):
    for index in range(len(outputs)):
      predicted_emotion = outputs[index] # (max_utterance, 5)
      # [sentence id, emotion, causal sentence id, casual start index, casual end index]
      conversation_ID = batch['conversation_ID'][index]
      dataset[conversation_ID-1]['emotion-cause_pairs'] = []
      for j in range(len(dataset[conversation_ID-1]['conversation'])):
        dataset[conversation_ID-1]['conversation'][j]['emotion'] = "neutral"
        dataset[conversation_ID-1]['conversation'][j]['video_name'] = "dia" + str(conversation_ID) + "utt" + str(j+1)

      for j in range(len(predicted_emotion)):
        casual_text = dataset[conversation_ID-1]['conversation'][predicted_emotion[2]]
        dataset[conversation_ID-1]['emotion-cause_pairs'].append(self.emotion_cause_pairs(casual_text, predicted_emotion, casual=False))
    return dataset
  def save_prediction(self, dataset):
    with open(self.output_json, 'w') as f:
      json.dump(dataset, f)

# **DATASET**

In [9]:
class Dataset(Dataset):
  def __init__(self, file, format="ECF 2.0", subtask="subtask_1", tokenizer=None, max_length=512):
    self.format = format
    self.subtask = subtask
    self.left_padding_side = 6
    self.right_padding_side = 2
    if format == 'ECF 2.0':
      self.tokenizer = tokenizer
      self.max_conversation_length = file['max_conversation_length']
      self.max_emotion_cause_pairs_length = file['max_emotion_cause_pairs_length']
      if self.subtask == "subtask_1":
        self.text_file = file['subtask_1_text_file']
        self.text_data = json.load(open(self.text_file))
        self.max_length = max_length
        self.text_to_number_mapping = {"anger": 0, 
                                       "disgust": 1, 
                                       "joy": 2, 
                                       "neutral": 3, 
                                       "fear": 4, 
                                       "sadness": 5, 
                                       "surprise": 6}
        self.build()
  def get_data(self):
    return self.text_data
  def __len__(self):
    return len(self.dataset)
  def build(self):
    self.padding_utterance()
    self.dataset = []
    for conversation_index in range(len(self.text_data)):
      for i in range(len(self.text_data[conversation_index]['conversation']) - self.left_padding_side - self.right_padding_side):
        conversation_ID = self.text_data[conversation_index]['conversation_ID']
        utterance_ID = self.text_data[conversation_index]['conversation'][i + self.left_padding_side]['utterance_ID']
        text = ""
        for j in range(self.left_padding_side):
          text += self.text_data[conversation_index]['conversation'][i + j + 1]['text']
        # text_token = self.tokenizer(text, padding="max_length", max_length=self.max_length, truncation=True, return_tensors="pt")
        speaker = self.text_data[conversation_index]['conversation'][i + self.left_padding_side]['speaker']
        emotion = self.text_data[conversation_index]['conversation'][i + self.left_padding_side]['emotion']
        
        causual_sentence_list = []
        causual_sentence_index_list = []
        causual_sentence_pos_list = []
        sentence_list = []
        for j in range(len(self.text_data[conversation_index]['emotion-cause_pairs'])):
          split_sentence = self.text_data[conversation_index]['emotion-cause_pairs'][j][0].split("_")
          sentence_index, emotion_sentence = split_sentence[0], split_sentence[1]
          if sentence_index == str(utterance_ID):
            split_sentence = self.text_data[conversation_index]['emotion-cause_pairs'][j][1].split("_")
            causual_sentence_index, causual_sentence = int(split_sentence[0]), split_sentence[1]
            sentence = self.text_data[conversation_index]['conversation'][causual_sentence_index + self.left_padding_side - 1]['text']
            causual_sentence_pos = self.find_sub_list(causual_sentence, sentence)
            causual_sentence_index_list.append(causual_sentence_index)
            sentence_list.append(sentence)
            causual_sentence_pos_list.append(causual_sentence_pos)
            causual_sentence_list.append(causual_sentence)
        
        causual_sentence_index_list, sentence_list, causual_sentence_pos_list, causual_sentence_list = self.padding_causual_sentence(causual_sentence_index_list, sentence_list, causual_sentence_pos_list, causual_sentence_list)
        utter = {'conversation_ID': conversation_ID,
                 'utterance_ID': utterance_ID,
                 'text': text,
                #  'text_token': text_token,
                 'speaker': speaker,
                 'emotion': emotion,
                 'causual_sentence_index': causual_sentence_index_list,
                 'sentence': sentence_list,
                 'causual_sentence_pos': causual_sentence_pos_list,
                 'causual_sentence': causual_sentence_list}
        self.dataset.append(utter)
  def find_sub_list(self, sub_sentence,sentence):
    sentence = sentence.split()
    sub_sentence = sub_sentence.split()
    sll=len(sub_sentence)
    for ind in (i for i,e in enumerate(sentence) if e==sub_sentence[0]):
        if sentence[ind:ind+sll]==sub_sentence:
            return [ind,ind+sll]
  def padding_utterance(self):
    utter = {
                "utterance_ID": 0,
                "text": "",
                "speaker": "",
                "emotion": "neutral"
            }
    for i in range(len(self.text_data)):
      for j in range(self.left_padding_side):
        self.text_data[i]['conversation'].insert(0, utter)
      for j in range(self.right_padding_side):
        self.text_data[i]['conversation'].append(utter)      
  def padding_causual_sentence(self, causual_sentence_index_list, sentence_list, causual_sentence_pos_list, causual_sentence_list):
    padding_side = 8 - len(causual_sentence_index_list)
    for i in range(padding_side):
      causual_sentence_index_list.append(0)
      sentence_list.append('')
      causual_sentence_pos_list.append([0,0])
      causual_sentence_list.append('')
    return causual_sentence_index_list, sentence_list, causual_sentence_pos_list, causual_sentence_list
  def emotion_EDA(self):
    self.emotion_counts = {"neutral": 0, 'surprise': 0, 'anger': 0, "disgust": 0, "fear": 0, "joy": 0, "sadness": 0}
    print("DISTANCE")
    dictation = {}
    for index in range(len(self.text_data)):
      emotion_cause_pairs = self.text_data[index]['emotion-cause_pairs']
      # print("emotion_cause_pairs:" +str(emotion_cause_pairs))
      for i in range(len(emotion_cause_pairs)):
        emotion = int(emotion_cause_pairs[i][0].split("_")[0])
        causual_emotion = int(emotion_cause_pairs[i][1].split("_")[0])
        string = str(emotion-causual_emotion)
        if string in dictation.keys():
          dictation[string] += 1
        else:
          dictation[string] = 1
    dictation = sorted(dictation.items(), key=lambda x:x[1])
    print(dictation)
    print("COUNTING")
    for index in range(len(self.text_data)):
      conversation = self.text_data[index]['conversation']
      for i in range(len(conversation)):
        emotion = conversation[i]['emotion']
        self.emotion_counts[emotion] += 1
    return self.emotion_counts  
  def __getitem__(self, index):
    item = self.dataset[index]
    emotion = self.text_to_number_mapping[item['emotion']]
    emotion_binary_tensor = torch.zeros(len(self.text_to_number_mapping))
    emotion_binary_tensor[emotion] = 1
    return {'conversation_ID': item['conversation_ID'],
              'utterance_ID': item['utterance_ID'],
              'text': self.tokenizer(item['text'], padding="max_length", max_length=self.max_length, truncation=True, return_tensors="pt"),
              'speaker': item['speaker'],
              'emotion': emotion_binary_tensor,
              'causual_sentence_index': item['causual_sentence_index'],
              'sentence': item['sentence'],
              'causual_sentence_pos': torch.FloatTensor(item['causual_sentence_pos']),
              'causual_sentence': item['causual_sentence']}

In [10]:
train_dataset = Dataset(file=file_train, format="ECF 2.0", subtask="subtask_1", tokenizer=tokenizer, max_length=max_length)
# train_dataset.emotion_EDA()
# next(iter(train_loader))

In [16]:
train_dataset.dataset[0:4]

[{'conversation_ID': 1,
  'utterance_ID': 1,
  'text': 'Alright , so I am back in high school , I am standing in the middle of the cafeteria , and I realize I am totally naked .',
  'speaker': 'Chandler',
  'emotion': 'neutral',
  'causual_sentence_index': [0, 0, 0, 0, 0, 0, 0, 0],
  'sentence': ['', '', '', '', '', '', '', ''],
  'causual_sentence_pos': [[0, 0],
   [0, 0],
   [0, 0],
   [0, 0],
   [0, 0],
   [0, 0],
   [0, 0],
   [0, 0]],
  'causual_sentence': ['', '', '', '', '', '', '', '']},
 {'conversation_ID': 1,
  'utterance_ID': 2,
  'text': 'Alright , so I am back in high school , I am standing in the middle of the cafeteria , and I realize I am totally naked .Oh , yeah . Had that dream .',
  'speaker': 'All',
  'emotion': 'neutral',
  'causual_sentence_index': [0, 0, 0, 0, 0, 0, 0, 0],
  'sentence': ['', '', '', '', '', '', '', ''],
  'causual_sentence_pos': [[0, 0],
   [0, 0],
   [0, 0],
   [0, 0],
   [0, 0],
   [0, 0],
   [0, 0],
   [0, 0]],
  'causual_sentence': ['', '', '

# **MODEL**

In [12]:
class VH(nn.Module):
  def __init__(self, encoder, label_dim):
    super(VH, self).__init__()
    self.label_dim = label_dim
    self.build(encoder)
    
  def build(self, encoder):
    self.embeddings = encoder.embeddings
    self.encoder = encoder.encoder
    self.pooler = encoder.pooler
    self.fc = nn.Linear(768, self.label_dim)
    
  def forward(self, x):
    text = x['text']
    emotion = x['emotion']
    causual_sentence_index = x['causual_sentence_index']
    causual_sentence_pos = x['causual_sentence_pos']
    emotion = x['emotion']
    emotion = x['emotion']
      
    

# **TEST**

In [13]:
train_loader = DataLoader(train_dataset, batch_size=4)
next(iter(train_loader)).keys()
# next(iter(train_loader))

dict_keys(['conversation_ID', 'utterance_ID', 'text', 'speaker', 'emotion', 'causual_sentence_index', 'sentence', 'causual_sentence_pos', 'causual_sentence'])

In [14]:
next(iter(train_loader))['causual_sentence_pos']

tensor([[[ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.]],

        [[ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.]],

        [[21., 28.],
         [ 0., 15.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.]],

        [[21., 28.],
         [ 0., 15.],
         [ 0.,  3.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.],
         [ 0.,  0.]]])

In [15]:
model = VH(encoder, 6).to('cuda:0')
model(next(iter(train_loader))['text_conversation']['input_ids'].to('cuda:0'), next(iter(train_loader))['speaker_conversation']['input_ids'].to('cuda:0'))

KeyError: 'text_conversation'

In [None]:
next(iter(train_loader))['text_conversation']

In [None]:
key = next(iter(train_loader)).keys()
for i in key:
    print(i)
    print(next(iter(train_loader))[i])
    print(next(iter(train_loader))[i].shape)