In [1]:
import numpy as np
import pandas as pd

import warnings

# Ignore all warnings
warnings.filterwarnings('ignore')

# **Loading The Dataset**

In [2]:
train = pd.read_csv(r'cleaned_train_emoji.csv')
train.head()

Unnamed: 0,Text,Label
0,never talk to me again,3
1,I am proud of your achievements,2
2,It is the worst day in my life,3
3,Miss you so much,0
4,food is life,4


In [3]:
test = pd.read_csv(r'cleaned_test_emoji.csv')
test.head()

Unnamed: 0,Text,Label
0,I want to eat\t,4
1,he did not answer\t,3
2,he got a raise\t,2
3,she got me a present\t,0
4,ha ha ha it was so funny\t,2


# **Emoji's**

In [4]:
import emoji

In [5]:
print(type(emoji.EMOJI_DATA))
list(emoji.EMOJI_DATA.keys())[:10]

<class 'dict'>


['🥇', '🥈', '🥉', '🆎', '🏧', '🅰️', '🅰', '🇦🇫', '🇦🇱', '🇩🇿']

In [6]:
emoji.EMOJI_DATA

{'🥇': {'en': ':1st_place_medal:',
  'status': 2,
  'E': 3,
  'de': ':goldmedaille:',
  'es': ':medalla_de_oro:',
  'fr': ':médaille_d’or:',
  'ja': ':金メダル:',
  'ko': ':금메달:',
  'pt': ':medalha_de_ouro:',
  'it': ':medaglia_d’oro:',
  'fa': ':مدال_طلا:',
  'id': ':medali_emas:',
  'zh': ':金牌:',
  'ru': ':золотая_медаль:',
  'tr': ':birincilik_madalyası:',
  'ar': ':ميدالية_مركز_أول:'},
 '🥈': {'en': ':2nd_place_medal:',
  'status': 2,
  'E': 3,
  'de': ':silbermedaille:',
  'es': ':medalla_de_plata:',
  'fr': ':médaille_d’argent:',
  'ja': ':銀メダル:',
  'ko': ':은메달:',
  'pt': ':medalha_de_prata:',
  'it': ':medaglia_d’argento:',
  'fa': ':مدال_نقره:',
  'id': ':medali_perak:',
  'zh': ':银牌:',
  'ru': ':серебряная_медаль:',
  'tr': ':ikincilik_madalyası:',
  'ar': ':ميدالية_مركز_ثان:'},
 '🥉': {'en': ':3rd_place_medal:',
  'status': 2,
  'E': 3,
  'de': ':bronzemedaille:',
  'es': ':medalla_de_bronce:',
  'fr': ':médaille_de_bronze:',
  'ja': ':銅メダル:',
  'ko': ':동메달:',
  'pt': ':medalha_de_b

In [7]:
emoji.emojize(':crown:', language = 'en')

'👑'

In [8]:
emoji.emojize(':books:', language = 'en')

'📚'

In [9]:
emoji_dict = {
    0: ':beating_heart:',
    1: ':baseball:',
    2: ':face_with_tears_of_joy:',
    3: ':face_with_head-bandage:',
    4: ':bread:'
}

In [10]:
for e in emoji_dict.values():
  print(emoji.emojize(e,), end = " ")

💓 ⚾ 😂 🤕 🍞 

# **Text PreProcessing**

In [11]:
X_train = train['Text']
X_train[:10]

0                never talk to me again
1       I am proud of your achievements
2        It is the worst day in my life
3                      Miss you so much
4                          food is life
5                        I love you mum
6                  Stop saying bullshit
7    congratulations on your acceptance
8           The assignment is too long 
9                     I want to go play
Name: Text, dtype: object

In [12]:
Y_train = train['Label']
Y_train[:10]

0    3
1    2
2    3
3    0
4    4
5    0
6    3
7    2
8    3
9    1
Name: Label, dtype: int64

In [13]:
X_train.shape, Y_train.shape

((132,), (132,))

# **Get Embeddings**

In [14]:
f = open('glove.6B.50d.txt', encoding = 'utf8', mode = 'r')

In [15]:
embedding_matrix = {}
for line in f:
    values = line.split()
    word = values[0]
    emb = np.array(values[1:], dtype = 'float')
    embedding_matrix[word] = emb

In [16]:
embedding_matrix['the'].shape

(50,)

In [17]:
def get_embedding_matrix_for_data(data, maxLen = 10, emb_dim = 50):
    embedding_data = np.zeros((len(data), maxLen, emb_dim))

    for ix in range(data.shape[0]):
        words_in_sentence = data[ix].split()

        for jx in range(len(words_in_sentence)):
            if embedding_matrix.get(words_in_sentence[jx].lower()) is not None:
                embedding_data[ix][jx] = embedding_matrix[words_in_sentence[jx].lower()]
    return embedding_data

In [18]:
X_train = get_embedding_matrix_for_data(X_train)
X_train.shape

(132, 10, 50)

In [19]:
from keras.utils import to_categorical

In [20]:
Y_train = to_categorical(Y_train)
Y_train.shape

(132, 5)

# **Model Building**

In [21]:
from keras.models import Sequential
from keras.layers import Input, Dense, LSTM, Dropout

In [22]:
# Define the model
model = Sequential()

# Add an Input layer as the first layer
model.add(Input(shape=(10, 50)))

# Add LSTM and other layers
model.add(LSTM(units=64, return_sequences=True))
model.add(Dropout(0.3))

model.add(LSTM(units=32))
model.add(Dropout(0.2))

model.add(Dense(units=10, activation='relu'))
model.add(Dense(units=5, activation='softmax'))

# Print the model summary
model.summary()

In [23]:
# Comiling the model

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [24]:
# Training the Model

model.fit(X_train, Y_train, validation_split = 0.1, batch_size = 32, epochs = 50)


Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - accuracy: 0.2090 - loss: 1.6101 - val_accuracy: 0.0714 - val_loss: 1.6143
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.3174 - loss: 1.5900 - val_accuracy: 0.0714 - val_loss: 1.6059
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3106 - loss: 1.5521 - val_accuracy: 0.0714 - val_loss: 1.5988
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3518 - loss: 1.5298 - val_accuracy: 0.1429 - val_loss: 1.5840
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3778 - loss: 1.4835 - val_accuracy: 0.0714 - val_loss: 1.5766
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4479 - loss: 1.4372 - val_accuracy: 0.1429 - val_loss: 1.5486
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0

<keras.src.callbacks.history.History at 0x30b3422f0>

In [25]:
# Training data Model Accuracy

train_accuracy = round(model.evaluate(X_train, Y_train)[1] * 100, 2)
print(f"Training Data Accuracy: {train_accuracy}%")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9847 - loss: 0.1257 
Training Data Accuracy: 96.97%


In [26]:
# PreProcessing Test data

test['Text'] = test['Text'].apply(lambda x: x[:-1])

In [27]:
X_test = test['Text']
X_test[:10]

0                          I want to eat
1                      he did not answer
2                         he got a raise
3                   she got me a present
4               ha ha ha it was so funny
5                    he is a good friend
6                             I am upset
7    We had such a lovely dinner tonight
8                      where is the food
9         Stop making this joke ha ha ha
Name: Text, dtype: object

In [28]:
Y_test = test['Label']
Y_test[:10]

0    4
1    3
2    2
3    0
4    2
5    0
6    0
7    0
8    4
9    2
Name: Label, dtype: int64

In [29]:
X_test.shape, Y_test.shape

((56,), (56,))

In [30]:
X_test = get_embedding_matrix_for_data(X_test)
X_test.shape

(56, 10, 50)

In [31]:
Y_test = to_categorical(Y_test)
Y_test.shape

(56, 5)

In [32]:
# Testing data Model accuracy

test_accuracy = round(model.evaluate(X_test, Y_test)[1] * 100, 2)
print(f"Testing Data Accuracy: {test_accuracy}%")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5923 - loss: 1.9329 
Testing Data Accuracy: 60.71%


In [33]:
Y_pred = model.predict(X_test)
Y_pred = (Y_pred > 0.5).astype(int)
print(Y_pred[:10])

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[[0 0 0 0 1]
 [0 0 0 1 0]
 [0 0 1 0 0]
 [0 0 1 0 0]
 [0 0 1 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 1 0 0]
 [0 0 0 0 1]
 [0 0 1 0 0]]


In [34]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Convert multilabel-indicator to multiclass
Y_pred_classes = np.argmax(Y_pred, axis=1)
Y_test_classes = np.argmax(Y_test, axis=1)

print("Confusion Matrix:")
print(confusion_matrix(Y_test_classes, Y_pred_classes))

print("Classification Report:")
print(classification_report(Y_test_classes, Y_pred_classes))

print("Accuracy Score:", round(accuracy_score(Y_test_classes, Y_pred_classes)*100,2),'%')

Confusion Matrix:
[[ 6  0  5  1  0]
 [ 0  4  0  1  0]
 [ 3  1 12  1  1]
 [ 2  1  4  7  1]
 [ 1  0  1  0  4]]
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.50      0.50        12
           1       0.67      0.80      0.73         5
           2       0.55      0.67      0.60        18
           3       0.70      0.47      0.56        15
           4       0.67      0.67      0.67         6

    accuracy                           0.59        56
   macro avg       0.62      0.62      0.61        56
weighted avg       0.60      0.59      0.59        56

Accuracy Score: 58.93 %


In [35]:
# Predicting Result for test data

for t in range(len(test)):
    print("Message: ",test['Text'].iloc[t])

    # Assuming 'Label' is the name of the column containing the emoji labels
    print("Actual: ",emoji.emojize(emoji_dict[test['Label'].iloc[t]]))

    # Find the index where the prediction is 1
    predicted_index = np.argmax(Y_pred[t])

    print("Predicted: ",emoji.emojize(emoji_dict[predicted_index])) # Use predicted index to access emoji_dict
    print()

Message:  I want to eat
Actual:  🍞
Predicted:  🍞

Message:  he did not answer
Actual:  🤕
Predicted:  🤕

Message:  he got a raise
Actual:  😂
Predicted:  😂

Message:  she got me a present
Actual:  💓
Predicted:  😂

Message:  ha ha ha it was so funny
Actual:  😂
Predicted:  😂

Message:  he is a good friend
Actual:  💓
Predicted:  😂

Message:  I am upset
Actual:  💓
Predicted:  🤕

Message:  We had such a lovely dinner tonight
Actual:  💓
Predicted:  😂

Message:  where is the food
Actual:  🍞
Predicted:  🍞

Message:  Stop making this joke ha ha ha
Actual:  😂
Predicted:  😂

Message:  where is the ball
Actual:  ⚾
Predicted:  ⚾

Message:  work is hard
Actual:  🤕
Predicted:  😂

Message:  This girl is messing with me
Actual:  🤕
Predicted:  💓

Message:  are you serious ha ha
Actual:  😂
Predicted:  🤕

Message:  Let us go play baseball
Actual:  ⚾
Predicted:  ⚾

Message:  This stupid grader is not working 
Actual:  🤕
Predicted:  🤕

Message:  work is horrible
Actual:  🤕
Predicted:  😂

Message:  Congratulat