In [144]:
import numpy as np
import pandas as pd
import emoji

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, SimpleRNN, Embedding,Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

In [145]:
data = pd.read_csv('emoji_data.csv',header = None)
data.head()

Unnamed: 0,0,1
0,French macaroon is so tasty,4
1,work is horrible,3
2,I am upset,3
3,throw the ball,1
4,Good joke,2


In [146]:
emoji_dict = {
    0: ":red_heart:",
    1: ":baseball:",
    2: ":grinning_face_with_big_eyes:",
    3: ":disappointed_face:",
    4: ":fork_and_knife_with_plate:"
}

def label_to_emoji(label):
    return emoji.emojize((emoji_dict[label]))
    

In [147]:
X = data[0].values
Y = data[1].values

In [148]:
file = open('glove.6B.100d.txt','r',encoding = 'utf8')
content = file.readlines()
file.close()

In [149]:
embeddings = {}
for line in content:
    line = line.split()
    embeddings[line[0]] = np.array(line[1:],dtype = float)

In [150]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
wordToIndex = tokenizer.word_index

In [151]:
# All sentences are of different length so do padding to make them of same length
def get_maxlen(data):
    maxlen = 0
    for sent in data:
        maxlen = max(maxlen,len(sent))
    return maxlen
maxlen = get_maxlen(Xtokens)


In [152]:
Xtokens = tokenizer.texts_to_sequences(X)
Xtrain = pad_sequences(Xtokens,maxlen = maxlen,padding = 'post',truncating='post')

In [153]:
# Ytrain = to_categorical(Y)
Ytrain = to_categorical(Y)

In [154]:
embed_size = 100
embedding_matrix = np.zeros((len(wordToIndex)+1,embed_size))
for word,i in wordToIndex.items():
    embed_vector = embeddings[word]
    embedding_matrix[i] = embed_vector

In [155]:

model = Sequential()
embedding_layer = Embedding(input_dim = len(wordToIndex)+1,
          output_dim = embed_size,
          input_shape = (maxlen,)
         )
model.add(embedding_layer)
model.add(LSTM(units = 16,return_sequences = True))
model.add(Dropout(0.5))
model.add(LSTM(units = 4))
model.add(Dropout(0.5))

model.add(Dense(5,activation='softmax'))

model.layers[0].set_weights([embedding_matrix])
model.layers[0].trainable = False

model.compile(optimizer = 'adam',loss = 'categorical_crossentropy',metrics = ['accuracy'])
model.summary()

  super().__init__(**kwargs)


In [156]:
model.fit(Xtrain,Ytrain,epochs=100)

Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.1872 - loss: 1.6076
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1590 - loss: 1.5978 
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2482 - loss: 1.5931 
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2608 - loss: 1.5996 
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1773 - loss: 1.5818 
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2826 - loss: 1.5786 
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.3054 - loss: 1.5710 
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.3219 - loss: 1.5298 
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x27e45006080>

In [157]:
test = ["I feel good", "I feel very bad", "I am Hungry"]
test_seq = tokenizer.texts_to_sequences(test)
Xtest = pad_sequences(test_seq,maxlen=maxlen,padding='post',truncating='post')
y_predict = model.predict(Xtest)
y_predict = np.argmax(y_predict,axis=1)

for i in range(len(test)):
    print(test[i],label_to_emoji(y_predict[i]))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 401ms/step
I feel good 😃
I feel very bad 😞
I am Hungry 🍽️
