In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive

In [None]:
%pwd

'/content/drive/MyDrive'

In [None]:
!pip install transformers
!pip install pytorch-lightning

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.2.0.post0-py3-none-any.whl (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.9/800.9 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.3.1-py3-none-any.whl (840 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m840.4/840.4 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.10.1-py3-none-any.whl (24 kB)
Installing collected packages: lightning-utilities, torchmetrics, pytorch-lightning
Successfully installed lightning-utilities-0.10.1 pytorch-lightning-2.2.0.post0 torchmetrics-1.3.1


In [None]:
import transformers
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch

import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split

import copy
from tqdm import tqdm

from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

rcParams['figure.figsize'] = 8, 6

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x7ca4d215c4b0>

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
train_path = '/content/drive/MyDrive/MELD/train_sent_emo.csv'
test_path = '/content/drive/MyDrive/MELD/test_sent_emo.csv'
val_path = '/content/drive/MyDrive/MELD/dev_sent_emo.csv'

In [None]:
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)
val_data = pd.read_csv(val_path)

In [None]:
def modify_df(dtst):

    unique_dialogue_ids = dtst['Dialogue_ID'].unique()

    columns = ["emo1", "emo2", "emo3", "emo4", "current"]
    new_model_data = pd.DataFrame(columns=columns)

    for Dialogue_id in unique_dialogue_ids:
        dialogue_df = dtst[dtst['Dialogue_ID'] == Dialogue_id]

        prev1 = 'neutral'
        prev2 = 'neutral'
        prev3 = 'neutral'
        prev4 = 'neutral'
        point = 'neutral'

        for i in dialogue_df['Utterance_ID'].values:
            if i == 0:
                continue

            point = dialogue_df.loc[dialogue_df['Utterance_ID'] == i, 'Emotion'].iloc[0]

            if (i-1 >= 0) and (i-1 in dialogue_df['Utterance_ID'].values):
                prev4 = dialogue_df.loc[dialogue_df['Utterance_ID'] == i-1, 'Emotion'].iloc[0]

            if (i-2 >= 0) and (i-2 in dialogue_df['Utterance_ID'].values):
                prev3 = dialogue_df.loc[dialogue_df['Utterance_ID'] == i-2, 'Emotion'].iloc[0]

            if (i-3 >= 0) and (i-3 in dialogue_df['Utterance_ID'].values):
                prev2 = dialogue_df.loc[dialogue_df['Utterance_ID'] == i-3, 'Emotion'].iloc[0]

            if (i-4 >= 0) and (i-4 in dialogue_df['Utterance_ID'].values):
                prev1 = dialogue_df.loc[dialogue_df['Utterance_ID'] == i-4, 'Emotion'].iloc[0]

            new_model_data.loc[len(new_model_data)] = [prev1, prev2, prev3, prev4, point]


    return new_model_data

In [None]:
train_df = modify_df(train_data)
test_df = modify_df(test_data)
valid_df = modify_df(val_data)

In [None]:
train_df

Unnamed: 0,emo1,emo2,emo3,emo4,current
0,neutral,neutral,neutral,neutral,neutral
1,neutral,neutral,neutral,neutral,neutral
2,neutral,neutral,neutral,neutral,neutral
3,neutral,neutral,neutral,neutral,surprise
4,neutral,neutral,neutral,surprise,neutral
...,...,...,...,...,...
8956,neutral,neutral,disgust,disgust,neutral
8957,neutral,disgust,disgust,neutral,neutral
8958,disgust,disgust,neutral,neutral,surprise
8959,disgust,neutral,neutral,surprise,neutral


In [None]:
# anger, disgust, fear, joy, sadness, surprise, and neutral

def to_score(emotion):

  if emotion == 'anger':
    return 0
  elif emotion == 'disgust':
    return 1
  elif emotion == 'fear':
    return 2
  elif emotion == 'joy':
    return 3
  elif emotion == 'sadness':
    return 4
  elif emotion == 'surprise':
    return 5
  elif emotion == 'neutral':
    return 6

In [None]:
for df in [train_df, test_df, valid_df]:
    df['emo1_label'] = df.emo1.apply(to_score)
    df['emo2_label'] = df.emo2.apply(to_score)
    df['emo3_label'] = df.emo3.apply(to_score)
    df['emo4_label'] = df.emo4.apply(to_score)
    df['current_label'] = df.current.apply(to_score)

In [None]:
train_df

Unnamed: 0,emo1,emo2,emo3,emo4,current,emo1_label,emo2_label,emo3_label,emo4_label,current_label
0,neutral,neutral,neutral,neutral,neutral,6,6,6,6,6
1,neutral,neutral,neutral,neutral,neutral,6,6,6,6,6
2,neutral,neutral,neutral,neutral,neutral,6,6,6,6,6
3,neutral,neutral,neutral,neutral,surprise,6,6,6,6,5
4,neutral,neutral,neutral,surprise,neutral,6,6,6,5,6
...,...,...,...,...,...,...,...,...,...,...
8956,neutral,neutral,disgust,disgust,neutral,6,6,1,1,6
8957,neutral,disgust,disgust,neutral,neutral,6,1,1,6,6
8958,disgust,disgust,neutral,neutral,surprise,1,1,6,6,5
8959,disgust,neutral,neutral,surprise,neutral,1,6,6,5,6


In [None]:
print("Data Types:", valid_df['current_label'].dtype)

Data Types: int64


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Flatten

# Assuming 'train_df', 'test_df', 'valid_df' are your separate DataFrames
label_encoder = LabelEncoder()

for df in [train_df, test_df, valid_df]:
    df['current_label'] = df['current'].apply(to_score)

X_train = train_df[['emo1_label', 'emo2_label', 'emo3_label', 'emo4_label']]
y_train = train_df['current_label']

X_test = test_df[['emo1_label', 'emo2_label', 'emo3_label', 'emo4_label']]
y_test = test_df['current_label']

X_valid = valid_df[['emo1_label', 'emo2_label', 'emo3_label', 'emo4_label']]
y_valid = valid_df['current_label']



# Create a simple neural network
model = Sequential()
model.add(Embedding(input_dim=7, output_dim=32, input_length=4))
model.add(LSTM(32, return_sequences=True))
# model.add(LSTM(16, return_sequences=True))
model.add(Flatten())
model.add(Dense(7, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(np.array(X_train), np.array(y_train), epochs=10, batch_size=32, validation_data=(np.array(X_valid), np.array(y_valid)))

# Evaluate the model on the test set
_, accuracy_test_nn = model.evaluate(np.array(X_test), np.array(y_test))
print(f"Accuracy on the test set: {accuracy_test_nn}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy on the test set: 0.485836923122406


In [None]:
# Get model predictions
predictions = model.predict(np.array(X_test))

# Print a subset of predictions and corresponding true labels
for i in range(10):  # Print predictions for the first 10 samples
    print(f"Prediction: {np.argmax(predictions[i])}, True Label: {y_test.iloc[i]}")

In [None]:
# Assuming label_encoder is already fitted on your data
sample_input = pd.DataFrame({'emo1_label': [2], 'emo2_label': [3], 'emo3_label': [1], 'emo4_label': [6]})

# Get predictions
predictions = model.predict(np.array(sample_input))




In [None]:
predictions

array([[0.142199  , 0.09338027, 0.07095202, 0.15570614, 0.09051296,
        0.13513054, 0.3121191 ]], dtype=float32)

In [None]:
# Assuming predictions is a probability distribution, you can get the predicted class
predicted_class = np.argmax(predictions, axis=1)[0]
predicted_class

6