In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_sequence
import random
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class MELD_loader(Dataset):
    def __init__(self, txt_file, dataclass):
        self.dialogs = []

        f = open(txt_file, 'r', encoding='utf8')
        dataset = f.readlines()
        f.close()

        temp_speakerList = []
        context = []
        context_speaker = []
        self.speakerNum = []
        # 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'
        emodict = {'anger': "anger", 'disgust': "disgust", 'fear': "fear", 'joy': "joy", 'neutral': "neutral", 'sadness': "sad", 'surprise': 'surprise'}
        self.sentidict = {'positive': ["joy"], 'negative': ["anger", "disgust", "fear", "sadness"], 'neutral': ["neutral", "surprise"]}
        self.emoSet = set()
        self.sentiSet = set()
        for i, data in enumerate(dataset):
            if i < 2:
                continue
            # print(len(context))
            if data == '\n'  and len(self.dialogs) > 0:
                self.speakerNum.append(len(temp_speakerList))
                temp_speakerList = []
                context = []
                context_speaker = []
                continue
            elif data != '\n' and len(context)>5 and len(self.dialogs) > 0:
                context.pop(0)
                context_speaker.pop(0)
            speaker, utt, emo, senti = data.strip().split('\t')
            context.append(utt)
            if speaker not in temp_speakerList:
                temp_speakerList.append(speaker)
            speakerCLS = temp_speakerList.index(speaker)
            context_speaker.append(speakerCLS)

            self.dialogs.append([context_speaker[:], context[:], emodict[emo], senti])
            self.emoSet.add(emodict[emo])
            self.sentiSet.add(senti)

        self.emoList = sorted(self.emoSet)
        self.sentiList = sorted(self.sentiSet)
        if dataclass == 'emotion':
            self.labelList = self.emoList
        else:
            self.labelList = self.sentiList
        self.speakerNum.append(len(temp_speakerList))

    def __len__(self):
        return len(self.dialogs)
    def __getitem__(self, idx):
        return self.dialogs[idx], self.labelList, self.sentidict

In [None]:
from transformers import RobertaTokenizer, RobertaModel
from transformers import BertTokenizer, BertModel
from transformers import GPT2Tokenizer, GPT2Model

In [None]:
# Roberta Gave the best output
roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
# bert_tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
# gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2-large')
# gpt_tokenizer.add_special_tokens({'cls_token': '[CLS]', 'pad_token': '[PAD]'})

def encode_right_truncated(text, tokenizer, max_length=511):
    tokenized = tokenizer.tokenize(text)
    truncated = tokenized[-max_length:]
    ids = tokenizer.convert_tokens_to_ids(truncated)

    return [tokenizer.cls_token_id] + ids

def tokenise_input(text, tokenizer):
  tokens = tokenizer(text, padding =True, truncation=True, return_tensors='pt')
  print(tokens)
  return tokens


def padding(ids_list, tokenizer):
    max_len = 0
    pad_ids = []

    for ids in ids_list:
        if isinstance(ids, dict) and 'input_ids' in ids:
            ids = ids['input_ids'].tolist()

        if isinstance(ids, list):
            max_len = max(max_len, len(ids))

    # Padding logic
    for ids in ids_list:
        if isinstance(ids, dict) and 'input_ids' in ids:
            ids = ids['input_ids'].tolist()


        if isinstance(ids, list):
            pad_len = max_len - len(ids)
            add_ids = [tokenizer.pad_token_id] * pad_len
            padded_sequence = ids + add_ids


            print(f"Padded sequence: {padded_sequence}")
            assert all(isinstance(i, int) for i in padded_sequence), f"Non-integer value found in {padded_sequence}"

            pad_ids.append(padded_sequence)

    return torch.tensor(pad_ids)



def make_batch_roberta(sessions):
    batch_input, batch_labels = [], []
    for session in sessions:
        data = session[0]
        label_list = session[1]
        context_speaker, context, emotion, sentiment = data
        inputString = ""
        for turn, (speaker, utt) in enumerate(zip(context_speaker, context)):
            inputString += f'<s{speaker+1}> {utt} '
        concat_string = inputString.strip()
        encoded = roberta_tokenizer.encode(concat_string, add_special_tokens=True,
                                           max_length=512, truncation=True)
        batch_input.append(torch.tensor(encoded))

        if len(label_list) > 3:
            label_ind = label_list.index(emotion)
        else:
            label_ind = label_list.index(sentiment)
        batch_labels.append(label_ind)

    batch_labels = torch.tensor(batch_labels)
    return batch_input, batch_labels

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [None]:
def roberta_emb(input_tokens):
    embeddings = []
    for input_token in input_tokens:
        input_token = input_token.to(device)

        attention_mask = (input_token != roberta_tokenizer.pad_token_id).long()
        input_dict = {
            'input_ids': input_token.unsqueeze(0),  # Add batch dimension
            'attention_mask': attention_mask.unsqueeze(0)  # Add batch dimension
        }

        with torch.no_grad():
              output = model(**input_dict)
              sentence_embedding = output.last_hidden_state[:, 0, :]
              embeddings.append(sentence_embedding.squeeze(0))


    return embeddings

In [None]:
import torch
import pdb
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_path ='/content/drive/MyDrive/train.txt'
cls='emotion'
X=MELD_loader(train_path, cls)
data_list = make_batch_roberta(X)
data, labels = data_list
model =RobertaModel.from_pretrained('roberta-base')
model.to(device)
#input_tokens = [torch.tensor(input_token.input_ids).to(device) for input_token in data]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaModel(
  (embeddings): RobertaEmbeddings(
    (word_embeddings): Embedding(50265, 768, padding_idx=1)
    (position_embeddings): Embedding(514, 768, padding_idx=1)
    (token_type_embeddings): Embedding(1, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): RobertaEncoder(
    (layer): ModuleList(
      (0-11): 12 x RobertaLayer(
        (attention): RobertaAttention(
          (self): RobertaSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): RobertaSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dr

In [None]:
X[10]

([[1, 0, 1, 0, 1, 0],
  ['Now you’ll be heading a whole division, so you’ll have a lot of duties.',
   'I see.',
   'But there’ll be perhaps 30 people under you so you can dump a certain amount on them.',
   'Good to know.',
   'We can go into detail',
   'No don’t I beg of you!'],
  'fear',
  'negative'],
 ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sad', 'surprise'],
 {'positive': ['joy'],
  'negative': ['anger', 'disgust', 'fear', 'sadness'],
  'neutral': ['neutral', 'surprise']})

In [None]:
X[19][0][1]

['But then who? The waitress I went out with last month?',
 'You know? Forget it!',
 'No-no-no-no, no! Who, who were you talking about?',
 "No, I-I-I-I don't, I actually don't know",
 'Ok!',
 'All right, well...']

In [None]:
embeddings = roberta_emb(data)

In [None]:
labels.max()

tensor(6)

In [None]:
len(embeddings[0])

In [None]:
import pickle
with open('/content/gdrive/MyDrive/roberta.pkl', 'wb') as file:
  pickle.dump(embeddings, file)

In [None]:
import pickle
import torch
import io

class CPUUnpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else:
            return super().find_class(module, name)

with open('/content/drive/MyDrive/roberta.pkl', 'rb') as file:
    roberta = CPUUnpickler(file).load()

  return lambda b: torch.load(io.BytesIO(b), map_location='cpu')


In [None]:
import pickle
with open ('/content/drive/MyDrive/comet.pkl', 'rb') as file:
  comet = pickle.load(file)

In [None]:
len(roberta[0])

768

In [None]:
len(roberta)

9989

In [None]:
X[1]

([[0, 1],
  ['also I was the point person on my company’s transition from the KL-5 to GR-6 system.',
   'You must’ve had your hands full.'],
  'neutral',
  'neutral'],
 ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sad', 'surprise'],
 {'positive': ['joy'],
  'negative': ['anger', 'disgust', 'fear', 'sadness'],
  'neutral': ['neutral', 'surprise']})

In [None]:
roberta[1]

tensor([-1.0733e-01,  1.0379e-01, -1.0700e-02, -9.3623e-02,  5.5355e-02,
        -5.6386e-02, -5.0469e-02,  1.7660e-02,  6.8255e-02, -6.1240e-02,
        -2.2596e-02, -1.3464e-02,  5.3519e-02, -9.0848e-03,  7.2193e-02,
         5.3061e-03, -5.0987e-02,  4.5855e-02, -4.7445e-02, -8.7637e-02,
        -4.8871e-02,  3.8680e-03, -1.8068e-02,  1.2191e-01,  1.4900e-02,
         3.9484e-02,  1.3491e-01,  1.2517e-01, -7.5517e-02, -1.2825e-02,
        -1.4039e-02, -3.2195e-02,  3.0559e-02,  2.3856e-02,  8.5759e-03,
         4.4768e-02,  1.2527e-02, -1.7395e-02, -6.6627e-02, -1.4470e-02,
         9.5134e-03,  1.6128e-01,  1.2669e-02, -1.7411e-02,  6.1191e-02,
         4.2819e-02,  3.2348e-02,  3.9785e-03, -2.6467e-02, -3.7983e-02,
        -2.5653e-02,  9.0578e-02, -1.0527e-02,  7.2799e-02, -1.2642e-01,
         4.3179e-02, -2.3828e-03,  7.9977e-03,  2.2333e-02, -8.5790e-02,
        -4.5859e-02, -1.9609e-01, -8.0044e-02, -6.7979e-02,  2.3984e-02,
        -6.0266e-02, -4.0124e-02, -1.2590e-03,  1.3

In [None]:
len(comet)

9

In [None]:
len(comet[3][4][2])

768

In [None]:
comet_avg = [[[[0]]]]

In [None]:
comet = np.array(comet)

In [None]:
comet_avg = np.array(comet_avg)

In [None]:
comet.shape

(9,)

In [None]:
com_embed = 0
conv = 0
sent = 0
dim = 0
comet_avg = [[[0 for k in range(len(comet[com_embed][i][j]))] for j in range(len(comet[com_embed][i]))] for i in range(len(comet[com_embed]))]

for i in range(0, len(comet[com_embed])):
    for j in range(0, len(comet[com_embed][i])):
        for k in range(0, len(comet[com_embed][i][j])):
            for comet_count in range(0, 9):
                comet_avg[i][j][k] += float(comet[comet_count][i][j][k])
            comet_avg[i][j][k] /= 9

In [None]:
comet_avg

Buffered data was truncated after reaching the output size limit.

In [None]:
import pickle
with open('/content/drive/MyDrive/comet_avg.pkl', 'wb') as file:
  pickle.dump(comet_avg, file)

In [None]:
import pickle
with open ('/content/drive/MyDrive/comet_avg.pkl', 'rb') as file:
  comet_avg = pickle.load(file)

In [None]:
print(type(comet_avg), type(comet_avg[0]), type(comet_avg[0][0]), type(comet_avg[0][0][0]))


<class 'list'> <class 'list'> <class 'list'> <class 'float'>


In [None]:
len(comet_avg[2])

13

In [None]:
def reshape_conv(comet_avg):
    return np.array([sentence for conversation in comet_avg for sentence in conversation])

comet_flattened = reshape_conv(comet_avg)

In [None]:
len(comet_flattened)

9971

In [None]:
comet_flattened = comet_flattened.tolist()

## Comet Features

In [None]:
!pwd

In [None]:
%cd /content/gdrive/MyDrive/comet_data/conv-emotion/COSMIC/feature-extraction/comet-commonsense

In [None]:
!bash scripts/setup/get_atomic_data.sh
!bash scripts/setup/get_conceptnet_data.sh
!bash scripts/setup/get_model_files.sh

In [None]:
!python '/content/gdrive/MyDrive/comet_data/conv-emotion/COSMIC/feature-extraction/comet-commonsense/scripts/data/make_atomic_data_loader.py'
!python '/content/gdrive/MyDrive/comet_data/conv-emotion/COSMIC/feature-extraction/comet-commonsense/scripts/data/make_conceptnet_data_loader.py'

In [None]:
!python '/content/gdrive/MyDrive/comet_data/conv-emotion/COSMIC/feature-extraction/comet-commonsense/src/main.py' --experiment_type atomic --experiment_num 0

In [None]:
train_path = '/content/train.txt'
Utterances = read_data(train_path)
%cd /content/gdrive/MyDrive/comet_data/conv-emotion/COSMIC/feature-extraction/
import pickle, numpy as np
from comet.csk_feature_extract import CSKFeatureExtractor

extractor = CSKFeatureExtractor()
feaures = extractor.extract(Utterances)
with open('comet.pkl', 'wb') as file:
  pickle.dump(features, file)


## node2vec embedding generation

In [1]:
!pip install node2vec

Collecting node2vec
  Downloading node2vec-0.5.0-py3-none-any.whl.metadata (849 bytes)
Downloading node2vec-0.5.0-py3-none-any.whl (7.2 kB)
Installing collected packages: node2vec
Successfully installed node2vec-0.5.0


In [2]:
import networkx as nx
from node2vec import Node2Vec
import pandas as pd

edges = []
with open("adjacency_list.txt", "r") as file:
    for line in file:
        u, v, weight = map(int, line.split())

        # Ensure all weights are positive
        if weight == 0:
            weight = 1e-5  # Add a small positive value to avoid zero weights

        edges.append((u, v, weight))

G = nx.Graph()
G.add_weighted_edges_from(edges)

node2vec = Node2Vec(G, dimensions=256, walk_length=30, num_walks=200, workers=4)

model = node2vec.fit(window=10, min_count=1, batch_words=4)

node_embeddings = {str(node): model.wv[str(node)] for node in G.nodes()}

embeddings_df = pd.DataFrame.from_dict(node_embeddings, orient='index')
embeddings_df.to_csv("node2vec_embeddings.csv", index_label="node")

print("Embedding for node 1:", node_embeddings["1"])


Computing transition probabilities:   0%|          | 0/260 [00:00<?, ?it/s]

Embedding for node 1: [-0.11329231 -0.02507848  0.05791118  0.05539483 -0.03425768  0.05780483
  0.14925615 -0.02155535 -0.0768605   0.00321206  0.05661148  0.00379956
  0.07784081 -0.06131534  0.00717829 -0.07593372  0.05114025  0.18640564
  0.01687528 -0.00077234  0.03335235  0.02741437 -0.01921835 -0.07338201
 -0.1615158  -0.02114637 -0.11975408 -0.0803207  -0.00945576 -0.06507736
 -0.11780559  0.02390758 -0.12654272 -0.05911248  0.0644909   0.07372091
  0.02411652 -0.01689165 -0.0110836  -0.03035076  0.01350853  0.12957563
 -0.00468777  0.0039326   0.19246536  0.07109601 -0.00249231 -0.15525152
  0.01451668  0.08058274  0.10284132  0.04219837  0.10714588 -0.01850704
  0.02476186 -0.04952521 -0.05359496  0.05884115  0.02328234 -0.00350043
 -0.05043707  0.07613693  0.06368523  0.09784085 -0.0090045   0.03074879
 -0.05217281  0.0227589   0.03171485 -0.12912609  0.07850457 -0.11920341
 -0.08215088 -0.04618284  0.04577246  0.05024264  0.0064846   0.0531562
 -0.0048541   0.12698764  0.09

In [None]:
node_embeddings

{'1': array([-0.04480776, -0.06275161,  0.12161338,  0.06563406,  0.14394191,
        -0.01283717, -0.10289611, -0.07727303, -0.08102733,  0.0377893 ,
         0.1628543 , -0.00278657, -0.0640544 , -0.01578132, -0.01974966,
         0.12160113, -0.01472939, -0.01261692, -0.25355953,  0.16217974,
         0.14175287, -0.11960623,  0.09170107,  0.00039366, -0.05396302,
         0.11965813, -0.10116038,  0.08742916,  0.02307168, -0.18275002,
        -0.08603552, -0.0304649 ,  0.07593535,  0.23576449,  0.23492728,
         0.04276075,  0.0256041 , -0.07478667, -0.03180512, -0.04299409,
        -0.02734961, -0.01089524,  0.07761526, -0.10769063,  0.10001687,
         0.03529314,  0.11985807,  0.05002313, -0.0343921 ,  0.06442742,
         0.02802101,  0.09182487,  0.13421951,  0.05639591,  0.13035734,
         0.13892794,  0.17067458, -0.02056159, -0.04375862,  0.14778678,
         0.12285899,  0.16125661,  0.08026356,  0.05717991,  0.05658103,
         0.05298231,  0.11664865,  0.10829915,

## Dataframe Creation

In [None]:
for i in range(len(comet_flattened)):
  comet_flattened[i] = torch.Tensor(comet_flattened[i])

In [None]:
type(comet_flattened[0])

torch.Tensor

In [None]:
import pandas as pd

sentences = []
emotions = []
roberta_embeddings = []
comet_embeddings = []

for i in range(9970):
    sentence = X[i][0][1]
    emotion = X[i][0][2]


    sentences.append(sentence)
    emotions.append(emotion)
    roberta_embeddings.append(roberta[i].cpu().numpy())
    comet_embeddings.append(comet_flattened[i].cpu().numpy())

df = pd.DataFrame({
    'sentence': sentences,
    'emotion': emotions,
    'roberta_embedding': roberta_embeddings,
    'comet_embedding': comet_embeddings
})

df.head(20)

Unnamed: 0,sentence,emotion,roberta_embedding,comet_embedding
0,[also I was the point person on my company’s t...,neutral,"[-0.08579077, 0.11388449, -0.015393346, -0.071...","[-0.3154785, -0.53731066, 0.106992655, -0.5735..."
1,[also I was the point person on my company’s t...,neutral,"[-0.10732669, 0.10379219, -0.010700445, -0.093...","[0.4939643, 0.7265146, -0.029198915, 0.0210089..."
2,[also I was the point person on my company’s t...,neutral,"[-0.10429555, 0.11667142, -0.02193544, -0.0824...","[-0.03479957, 0.07146503, -0.18242458, -0.4558..."
3,[also I was the point person on my company’s t...,neutral,"[-0.10901403, 0.12433116, -0.008018338, -0.084...","[0.61617833, 0.19367002, 0.08130319, -0.983244..."
4,[also I was the point person on my company’s t...,surprise,"[-0.11464099, 0.11366032, -0.005524586, -0.074...","[0.32257935, -0.06760353, -0.058628332, -0.486..."
5,[also I was the point person on my company’s t...,neutral,"[-0.108649455, 0.11062101, -0.0026165054, -0.0...","[0.53397405, -0.26545075, 0.120881915, -0.2953..."
6,"[You must’ve had your hands full., That I did....",neutral,"[-0.13622855, 0.08976007, 0.011292699, -0.1088...","[-0.008280452, 0.01097959, 0.32643604, -0.7241..."
7,"[That I did. That I did., So let’s talk a litt...",neutral,"[-0.118643306, 0.09314076, 0.0028970956, -0.10...","[0.18741038, -0.07223858, -0.5301217, -0.17733..."
8,[So let’s talk a little bit about your duties....,neutral,"[-0.11838071, 0.09147424, 0.0065669017, -0.112...","[0.0531141, 0.04498022, 0.08057419, -0.7608909..."
9,"[My duties? All right., Now you’ll be heading...",neutral,"[-0.10618267, 0.08756316, 0.00035558897, -0.11...","[0.642974, 0.08882743, 0.5323374, -0.63755155,..."


In [None]:
df.to_pickle("dataframe_embeddings_combined.pkl")

In [14]:
df = pd.read_pickle("dataframe_embeddings_combined.pkl")
df

Unnamed: 0,sentence,emotion,roberta_embedding,comet_embedding
0,[also I was the point person on my company’s t...,neutral,"[-0.08579077, 0.11388449, -0.015393346, -0.071...","[-0.3154785, -0.53731066, 0.106992655, -0.5735..."
1,[also I was the point person on my company’s t...,neutral,"[-0.10732669, 0.10379219, -0.010700445, -0.093...","[0.4939643, 0.7265146, -0.029198915, 0.0210089..."
2,[also I was the point person on my company’s t...,neutral,"[-0.10429555, 0.11667142, -0.02193544, -0.0824...","[-0.03479957, 0.07146503, -0.18242458, -0.4558..."
3,[also I was the point person on my company’s t...,neutral,"[-0.10901403, 0.12433116, -0.008018338, -0.084...","[0.61617833, 0.19367002, 0.08130319, -0.983244..."
4,[also I was the point person on my company’s t...,surprise,"[-0.11464099, 0.11366032, -0.005524586, -0.074...","[0.32257935, -0.06760353, -0.058628332, -0.486..."
...,...,...,...,...
9965,"[People will always wanna invest in movies!, H...",neutral,"[-0.15505923, 0.04556754, 0.009690387, -0.1450...","[0.17808683, 0.054896764, -0.06825176, -0.4330..."
9966,"[Hey, you're not rich are ya?, No!, Eh, worth ...",neutral,"[-0.16726404, 0.04252459, 0.021378098, -0.1234...","[0.1081354, 0.32667288, -0.06419178, -0.001261..."
9967,"[No!, Eh, worth a shot. Look Joey, let me kno...",sad,"[-0.13662772, 0.057296306, 0.010685312, -0.107...","[-0.013356431, 0.06027376, 0.0328802, -0.31809..."
9968,"[Rachel, do you have any muffins left?]",neutral,"[-0.07507874, 0.077730484, 0.00032047942, -0.1...","[0.41771066, 0.37602848, -0.026077485, 0.10542..."


In [4]:
df.to_csv("dataframe_embeddings_combined.csv")

In [15]:
import pandas as pd

file_path = "/content/drive/MyDrive/train.txt"

speakers = []
utterances = []
emotions = []
sentiments = []

with open(file_path, 'r') as file:
    for line in file:
        data = line.strip().split('\t')
        if len(data) == 4:
            speakers.append(data[0])
            utterances.append(data[1])
            emotions.append(data[2])
            sentiments.append(data[3])

df_speaker = pd.DataFrame({
    'Speaker': speakers,
    'Utterance': utterances,
    'Emotion': emotions,
    'Sentiment': sentiments
})

df_speaker

Unnamed: 0,Speaker,Utterance,Emotion,Sentiment
0,Speaker,Utterance,Emotion,Sentiment
1,Chandler,also I was the point person on my company’s tr...,neutral,neutral
2,The Interviewer,You must’ve had your hands full.,neutral,neutral
3,Chandler,That I did. That I did.,neutral,neutral
4,The Interviewer,So let’s talk a little bit about your duties.,neutral,neutral
...,...,...,...,...
9985,Chandler,You or me?,neutral,neutral
9986,Ross,"I got it. Uh, Joey, women don't have Adam's ap...",neutral,neutral
9987,Joey,"You guys are messing with me, right?",surprise,positive
9988,All,Yeah.,neutral,neutral


In [16]:
df_speaker = df_speaker[1:9971]
df_speaker

Unnamed: 0,Speaker,Utterance,Emotion,Sentiment
1,Chandler,also I was the point person on my company’s tr...,neutral,neutral
2,The Interviewer,You must’ve had your hands full.,neutral,neutral
3,Chandler,That I did. That I did.,neutral,neutral
4,The Interviewer,So let’s talk a little bit about your duties.,neutral,neutral
5,Chandler,My duties? All right.,surprise,positive
...,...,...,...,...
9966,Joey,Yeah.,neutral,neutral
9967,The Grip,These got left for ya.,neutral,neutral
9968,Joey,Thanks. Congratulations on your big break.,sadness,negative
9969,Julie,"Rachel, do you have any muffins left?",neutral,neutral


In [18]:
if len(df) == len(df_speaker):
    df['Speaker'] = df_speaker['Speaker'].values
else:
    print("DataFrames do not have the same number of rows, cannot merge sequentially.")

df


Unnamed: 0,sentence,emotion,roberta_embedding,comet_embedding,Speaker
0,[also I was the point person on my company’s t...,neutral,"[-0.08579077, 0.11388449, -0.015393346, -0.071...","[-0.3154785, -0.53731066, 0.106992655, -0.5735...",Chandler
1,[also I was the point person on my company’s t...,neutral,"[-0.10732669, 0.10379219, -0.010700445, -0.093...","[0.4939643, 0.7265146, -0.029198915, 0.0210089...",The Interviewer
2,[also I was the point person on my company’s t...,neutral,"[-0.10429555, 0.11667142, -0.02193544, -0.0824...","[-0.03479957, 0.07146503, -0.18242458, -0.4558...",Chandler
3,[also I was the point person on my company’s t...,neutral,"[-0.10901403, 0.12433116, -0.008018338, -0.084...","[0.61617833, 0.19367002, 0.08130319, -0.983244...",The Interviewer
4,[also I was the point person on my company’s t...,surprise,"[-0.11464099, 0.11366032, -0.005524586, -0.074...","[0.32257935, -0.06760353, -0.058628332, -0.486...",Chandler
...,...,...,...,...,...
9965,"[People will always wanna invest in movies!, H...",neutral,"[-0.15505923, 0.04556754, 0.009690387, -0.1450...","[0.17808683, 0.054896764, -0.06825176, -0.4330...",Joey
9966,"[Hey, you're not rich are ya?, No!, Eh, worth ...",neutral,"[-0.16726404, 0.04252459, 0.021378098, -0.1234...","[0.1081354, 0.32667288, -0.06419178, -0.001261...",The Grip
9967,"[No!, Eh, worth a shot. Look Joey, let me kno...",sad,"[-0.13662772, 0.057296306, 0.010685312, -0.107...","[-0.013356431, 0.06027376, 0.0328802, -0.31809...",Joey
9968,"[Rachel, do you have any muffins left?]",neutral,"[-0.07507874, 0.077730484, 0.00032047942, -0.1...","[0.41771066, 0.37602848, -0.026077485, 0.10542...",Julie


In [19]:
df.to_pickle("dataframe_embeddings_combined_speaker.pkl")

In [21]:
metadata_mapping = {}
with open("metadata.txt", "r") as f:
    for line in f:
        line = line.strip()
        if line:
            node_number, speaker = line.split(" ", 1)
            metadata_mapping[speaker] = node_number

In [23]:
def get_node_embedding(speaker):
    node_number = metadata_mapping.get(speaker)
    if node_number and node_number in node_embeddings:
        return node_embeddings[node_number]


df['node_embedding'] = df['Speaker'].apply(get_node_embedding)

df

Unnamed: 0,sentence,emotion,roberta_embedding,comet_embedding,Speaker,node_embedding
0,[also I was the point person on my company’s t...,neutral,"[-0.08579077, 0.11388449, -0.015393346, -0.071...","[-0.3154785, -0.53731066, 0.106992655, -0.5735...",Chandler,"[-0.113292314, -0.025078477, 0.05791118, 0.055..."
1,[also I was the point person on my company’s t...,neutral,"[-0.10732669, 0.10379219, -0.010700445, -0.093...","[0.4939643, 0.7265146, -0.029198915, 0.0210089...",The Interviewer,"[-0.10751053, -0.086833104, 0.076948896, 0.091..."
2,[also I was the point person on my company’s t...,neutral,"[-0.10429555, 0.11667142, -0.02193544, -0.0824...","[-0.03479957, 0.07146503, -0.18242458, -0.4558...",Chandler,"[-0.113292314, -0.025078477, 0.05791118, 0.055..."
3,[also I was the point person on my company’s t...,neutral,"[-0.10901403, 0.12433116, -0.008018338, -0.084...","[0.61617833, 0.19367002, 0.08130319, -0.983244...",The Interviewer,"[-0.10751053, -0.086833104, 0.076948896, 0.091..."
4,[also I was the point person on my company’s t...,surprise,"[-0.11464099, 0.11366032, -0.005524586, -0.074...","[0.32257935, -0.06760353, -0.058628332, -0.486...",Chandler,"[-0.113292314, -0.025078477, 0.05791118, 0.055..."
...,...,...,...,...,...,...
9965,"[People will always wanna invest in movies!, H...",neutral,"[-0.15505923, 0.04556754, 0.009690387, -0.1450...","[0.17808683, 0.054896764, -0.06825176, -0.4330...",Joey,"[-0.017869603, -0.067539334, 0.0696035, -0.069..."
9966,"[Hey, you're not rich are ya?, No!, Eh, worth ...",neutral,"[-0.16726404, 0.04252459, 0.021378098, -0.1234...","[0.1081354, 0.32667288, -0.06419178, -0.001261...",The Grip,"[-0.044668414, -0.08820224, 0.055806573, -0.05..."
9967,"[No!, Eh, worth a shot. Look Joey, let me kno...",sad,"[-0.13662772, 0.057296306, 0.010685312, -0.107...","[-0.013356431, 0.06027376, 0.0328802, -0.31809...",Joey,"[-0.017869603, -0.067539334, 0.0696035, -0.069..."
9968,"[Rachel, do you have any muffins left?]",neutral,"[-0.07507874, 0.077730484, 0.00032047942, -0.1...","[0.41771066, 0.37602848, -0.026077485, 0.10542...",Julie,"[-0.008373053, -0.08706177, 0.01026787, 0.0089..."


In [24]:
df.to_pickle("dataframe_embeddings_combined_speaker_node2vec.pkl")

# Comet Features 2

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!conda install tensorflow
!pip install ftfy==5.1
!conda install -c conda-forge spacy
!python -m spacy download en
!pip install tensorboardX
!pip install tqdm
!pip install pandas
!pip install ipython

In [None]:
!pip install nltk
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

In [None]:
def read_data(tran_path):
  f = open(train_path, 'r', encoding='utf8')
  dataset = f.readlines()
  f.close()
  Utterances = {}
  Utterance = []
  j =0
  for i, data in enumerate(dataset):
    if i < 2:
      continue
    if data == '\n':
      Utterances[j] = Utterance
      Utterance = []
      j +=1
    else:
      speaker, utt, emo, senti = data.strip().split('\t')
      Utterance.append(utt)
  return Utterances

In [None]:
Utterances = {}
#%cd /content/gdrive/MyDrive/phdproject/CoMPM/dataset/MELD/multi
train_path = 'train.txt'
Utterances = read_data(train_path)

In [None]:
Utterances[0]

In [None]:
with open('meld_utterancedCOMPM.pkl', 'wb') as file:
  pickle.dump(Utterances, file)

In [None]:
%cd /content/gdrive/MyDrive/comet_data/conv-emotion/COSMIC/feature-extraction
import pickle, numpy as np
from comet.csk_feature_extract import CSKFeatureExtractor

extractor = CSKFeatureExtractor()
feaures = extractor.extract(Utterances)

In [None]:
len(feaures[1][1037][10])

In [None]:
with open('/content/drive/MyDrive/comet.pkl', 'wb') as file:
  pickle.dump(feaures, file)

In [None]:
%cd /content/gdrive/MyDrive/Common_sense/conv-emotion/COSMIC/feature-extraction
for dataset in ['meld']:
    print ('Extracting features in', dataset)
    sentences = pickle.load(open(dataset + '/' + dataset + '_sentences.pkl', 'rb'))
    print(sentences.keys())
    break

In [None]:
import pickle, numpy as np
from comet.csk_feature_extract import CSKFeatureExtractor

extractor = CSKFeatureExtractor()

for dataset in ['meld']:
    print ('Extracting features in', dataset)
    sentences = pickle.load(open(dataset + '/' + dataset + '_sentences.pkl', 'rb'))
    feaures = extractor.extract(sentences)
    pickle.dump(feaures, open(dataset + '/' + dataset + '_features_comet.pkl', 'wb'))

print ('Done!')

In [None]:
sentences

In [None]:
for key , value in sentences.items():
  print(f'{key} : {value}' )

In [None]:
import pickle
with open('comet.pkl', 'rb') as f:
    comet = pickle.load(f)

In [None]:
len(comet[0])

#Data Setup for Training and Inferencing

In [2]:
import pandas as pd
df = pd.read_pickle("/content/dataframe_embeddings_combined_speaker_node2vec.pkl")
df

Unnamed: 0,sentence,emotion,roberta_embedding,comet_embedding,Speaker,node_embedding
0,[also I was the point person on my company’s t...,neutral,"[-0.08579077, 0.11388449, -0.015393346, -0.071...","[-0.3154785, -0.53731066, 0.106992655, -0.5735...",Chandler,"[-0.113292314, -0.025078477, 0.05791118, 0.055..."
1,[also I was the point person on my company’s t...,neutral,"[-0.10732669, 0.10379219, -0.010700445, -0.093...","[0.4939643, 0.7265146, -0.029198915, 0.0210089...",The Interviewer,"[-0.10751053, -0.086833104, 0.076948896, 0.091..."
2,[also I was the point person on my company’s t...,neutral,"[-0.10429555, 0.11667142, -0.02193544, -0.0824...","[-0.03479957, 0.07146503, -0.18242458, -0.4558...",Chandler,"[-0.113292314, -0.025078477, 0.05791118, 0.055..."
3,[also I was the point person on my company’s t...,neutral,"[-0.10901403, 0.12433116, -0.008018338, -0.084...","[0.61617833, 0.19367002, 0.08130319, -0.983244...",The Interviewer,"[-0.10751053, -0.086833104, 0.076948896, 0.091..."
4,[also I was the point person on my company’s t...,surprise,"[-0.11464099, 0.11366032, -0.005524586, -0.074...","[0.32257935, -0.06760353, -0.058628332, -0.486...",Chandler,"[-0.113292314, -0.025078477, 0.05791118, 0.055..."
...,...,...,...,...,...,...
9965,"[People will always wanna invest in movies!, H...",neutral,"[-0.15505923, 0.04556754, 0.009690387, -0.1450...","[0.17808683, 0.054896764, -0.06825176, -0.4330...",Joey,"[-0.017869603, -0.067539334, 0.0696035, -0.069..."
9966,"[Hey, you're not rich are ya?, No!, Eh, worth ...",neutral,"[-0.16726404, 0.04252459, 0.021378098, -0.1234...","[0.1081354, 0.32667288, -0.06419178, -0.001261...",The Grip,"[-0.044668414, -0.08820224, 0.055806573, -0.05..."
9967,"[No!, Eh, worth a shot. Look Joey, let me kno...",sad,"[-0.13662772, 0.057296306, 0.010685312, -0.107...","[-0.013356431, 0.06027376, 0.0328802, -0.31809...",Joey,"[-0.017869603, -0.067539334, 0.0696035, -0.069..."
9968,"[Rachel, do you have any muffins left?]",neutral,"[-0.07507874, 0.077730484, 0.00032047942, -0.1...","[0.41771066, 0.37602848, -0.026077485, 0.10542...",Julie,"[-0.008373053, -0.08706177, 0.01026787, 0.0089..."


In [3]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

label_encoder = LabelEncoder()
df['emotion'] = label_encoder.fit_transform(df['emotion'])  # Now emotions are integers

# Extract embeddings and labels
X_roberta = np.stack(df['roberta_embedding'].values)               # Shape: (num_samples, 768)
X_comet = np.stack(df['comet_embedding'].values)                   # Shape: (num_samples, 768)
X_node2vec = np.stack(df['node_embedding'].values)                 # Shape: (num_samples, 256)
X_combined_roberta_comet = np.concatenate([X_roberta, X_comet], axis=1)  # Shape: (num_samples, 1536)
X_combined_all = np.concatenate([X_roberta, X_comet, X_node2vec], axis=1)  # Shape: (num_samples, 1792)
y = df['emotion'].values

# Split data into train and test sets
X_roberta_train, X_roberta_test, y_train, y_test = train_test_split(X_roberta, y, test_size=0.2, random_state=42)
X_comet_train, X_comet_test = train_test_split(X_comet, test_size=0.2, random_state=42)
X_combined_roberta_comet_train, X_combined_roberta_comet_test = train_test_split(X_combined_roberta_comet, test_size=0.2, random_state=42)
X_combined_all_train, X_combined_all_test = train_test_split(X_combined_all, test_size=0.2, random_state=42)


# RoberTa Model

In [98]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau

def build_roberta_model():
    # Input layer
    inputs = layers.Input(shape=(768,))

    # First block
    x = layers.Dense(768, kernel_initializer='he_uniform', kernel_regularizer=tf.keras.regularizers.l2(0.01))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.Dropout(0.3)(x)  # Increased dropout

    # Second block
    x = layers.Dense(512, kernel_initializer='he_uniform', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.Dropout(0.4)(x)  # Increased dropout

    # Third block
    x = layers.Dense(256, kernel_initializer='he_uniform', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.Dropout(0.3)(x)  # Increased dropout

    # Output layer
    outputs = layers.Dense(7, activation='softmax')(x)

    # Create model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    # Custom optimizer configuration
    optimizer = tf.keras.optimizers.AdamW(
        learning_rate=2e-4,
        weight_decay=0.01,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7
    )

    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


# COMeT Model

In [99]:
def build_comet_model():
    model = models.Sequential([
        # Input layer
        layers.Input(shape=(768,)),

        # First block
        layers.BatchNormalization(),
        layers.Dense(1024, kernel_initializer='he_normal', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        layers.LeakyReLU(alpha=0.1),
        layers.Dropout(0.5),  # Slightly higher dropout

        # Second block
        layers.BatchNormalization(),
        layers.Dense(512, kernel_initializer='he_normal', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        layers.LeakyReLU(alpha=0.1),
        layers.Dropout(0.4),  # Slightly higher dropout

        # Third block
        layers.BatchNormalization(),
        layers.Dense(256, kernel_initializer='he_normal', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        layers.LeakyReLU(alpha=0.1),
        layers.Dropout(0.3),  # Increased dropout

        # Output layer
        layers.Dense(7, activation='softmax')
    ])

    # Compile with a reduced learning rate
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=5e-4,  # Reduced learning rate
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-07,
        amsgrad=True
    )

    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

#RoBERTa + COMET Model

In [103]:
def build_roberta_comet_model():
    # Input layer with slight regularization
    inputs = layers.Input(shape=(1536,),)

    x = layers.BatchNormalization()(inputs)

    # First block
    x = layers.Dense(1024, kernel_initializer='he_normal',
                     kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.45)(x)  # Increased dropout

    # Second block
    x = layers.Dense(512, kernel_initializer='he_normal',
                     kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)  # Increased dropout

    # Third block
    x = layers.Dense(256, kernel_initializer='he_normal',
                     kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.35)(x)


    x = layers.Dense(128, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.35)(x)

    # Output layer
    outputs = layers.Dense(7, activation='softmax')(x)

    # Create model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    initial_learning_rate = 1e-3
    decay_steps = 800
    min_lr = 1e-6

    lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
        initial_learning_rate,
        first_decay_steps=decay_steps,
        t_mul=1.5,
        m_mul=0.9,
        alpha=min_lr/initial_learning_rate
    )

    # Optimizer with AdamW and learning rate schedule
    optimizer = tf.keras.optimizers.AdamW(
        learning_rate=lr_schedule,
        weight_decay=0.01,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7,
        amsgrad=True
    )

    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

#RoBERTa + COMET + node2vec Model

In [104]:
def build_roberta_comet_node2vec_model():
    # Input layer
    inputs = layers.Input(shape=(1792,))

    # Initial batch normalization
    x = layers.BatchNormalization()(inputs)

    # First block
    x = layers.Dense(2048, kernel_initializer='he_normal',
                     kernel_regularizer=regularizers.l2(2e-4))(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.45)(x)

    # Second block
    x = layers.Dense(1024, kernel_initializer='he_normal',
                     kernel_regularizer=regularizers.l2(2e-4))(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)

    # Third block
    x = layers.Dense(512, kernel_initializer='he_normal',
                     kernel_regularizer=regularizers.l2(2e-4))(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.35)(x)

    # Fourth block
    x = layers.Dense(256, kernel_initializer='he_normal',
                     kernel_regularizer=regularizers.l2(2e-4))(x)
    x = layers.LeakyReLU(alpha=0.1)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)

    # Output layer
    outputs = layers.Dense(7, activation='softmax')(x)

    # Create model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)


    initial_learning_rate = 1e-3
    total_steps = 1000  # Adjust based on your training setup
    warmup_steps = 100  # Adjust as needed

    lr_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(
        initial_learning_rate=initial_learning_rate,
        first_decay_steps=total_steps,
        t_mul=2.0,
        m_mul=0.9,
        alpha=1e-5
    )

    optimizer = tf.keras.optimizers.AdamW(
        learning_rate=lr_schedule,
        weight_decay=0.01,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-7,
        amsgrad=True
    )

    # Compile with gradient clipping
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


# Training

In [105]:
# Training all models
models = {
    'Model_Roberta': (build_roberta_model(), X_roberta_train, X_roberta_test),
    'Model_Comet': (build_comet_model(), X_comet_train, X_comet_test),
    'Model_Roberta_Comet': (build_roberta_comet_model(), X_combined_roberta_comet_train, X_combined_roberta_comet_test),
    'Model_Roberta_Comet_Node2Vec': (build_roberta_comet_node2vec_model(), X_combined_all_train, X_combined_all_test)
}

history_data = {}
for model_name, (model, X_train, X_test) in models.items():
    print(f"\nTraining {model_name}...")
    early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True)
    history = model.fit(X_train, y_train, epochs=20, validation_split=0.2,callbacks=[early_stopping], batch_size=128, verbose=1)

    history_data[model_name] = history



Training Model_Roberta...
Epoch 1/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 114ms/step - accuracy: 0.2292 - loss: 31.9651 - val_accuracy: 0.4712 - val_loss: 28.7811
Epoch 2/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4201 - loss: 27.8792 - val_accuracy: 0.4712 - val_loss: 25.4290
Epoch 3/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4758 - loss: 24.4991 - val_accuracy: 0.4712 - val_loss: 22.3939
Epoch 4/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4810 - loss: 21.5351 - val_accuracy: 0.4712 - val_loss: 19.7474
Epoch 5/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4975 - loss: 18.9682 - val_accuracy: 0.4718 - val_loss: 17.4624
Epoch 6/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5014 - loss: 16.7500 - val_accuracy: 0.4799 - val_loss: 15.449

# Evaluation

In [107]:
from sklearn.metrics import classification_report, confusion_matrix

for model_name, (model, _, X_test) in models.items():
    print(f"\nEvaluation for {model_name}:")
    y_pred = model.predict(X_test).argmax(axis=1)
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
    print(confusion_matrix(y_test, y_pred))



Evaluation for Model_Roberta:
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step

Classification Report:
              precision    recall  f1-score   support

       anger       0.54      0.39      0.45       886
     disgust       0.13      0.60      0.22       215
        fear       0.42      0.11      0.18       224
         joy       0.42      0.67      0.51      1399
     neutral       0.79      0.47      0.59      3734
         sad       0.29      0.55      0.38       564
    surprise       0.48      0.37      0.42       954

    accuracy                           0.48      7976
   macro avg       0.44      0.45      0.39      7976
weighted avg       0.59      0.48      0.50      7976


Confusion Matrix:
[[ 348  119    4  146   95  118   56]
 [  10  130    0   26   15   23   11]
 [  22   34   25   28   41   53   21]
 [  45   97    5  942  143   96   71]
 [ 137  416   19  830 1747  385  200]
 [  39   59    2   63   70  312   19]
 [  49  137    4  227  111