<a href="https://colab.research.google.com/github/Keshav13dev/Aptos_project/blob/main/EmojiPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd
import numpy as np
import emoji

from keras.models import Sequential
from keras.layers import Dense,LSTM, SimpleRNN, Embedding, Dropout

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical



data = pd.read_csv('/content/sample_data/emoji_data.csv',header = None)
data.columns = ['Text', 'Label']
data = data.dropna()
data = data[data['Label'].apply(lambda x: str(x).isdigit())]

X = data['Text'].values
Y = data['Label'].values.astype(int)

X = [x.lower() for x in X]

emoji_dict = {
    0: ":red_heart:",
    1: ":baseball:",
    2: ":grinning_face_with_big_eyes:",
    3: ":disappointed_face:",
    4: ":fork_and_knife_with_plate:"
}

def label_to_emoji(label):
  return emoji.emojize(emoji_dict[label])


#Embeddings
#file = open('/content/sample_data/glove.6B.100d.txt','r', encoding = 'utf8')
#content = file.readlines()
#file.close()

# content

embeddings = {}
with open('/content/sample_data/glove.6B.100d.txt','r', encoding = 'utf8') as file:
  for line in file:
    split_line = line.split()
    word = split_line[0]
    vector = np.array(split_line[1:], dtype = float)
    embeddings[word] = vector

  #def get_maxlen(data):
    #maxlen = 0
    #for sent in data:
        #maxlen = max(maxlen, len(sent))
    #return max(len(sent) for sent in data)

  tokenizer = Tokenizer()
  tokenizer.fit_on_texts(X)
  word2index = tokenizer.word_index
  Xtokens = tokenizer.texts_to_sequences(X)

  maxlen = max(len(sent) for sent in Xtokens)
  Xtrain = pad_sequences(Xtokens, maxlen = maxlen,  padding = 'post', truncating = 'post')
  Ytrain = to_categorical(Y)

  #check dataset
  print("Label distribution:\n", data['Label'].value_counts())



  #Model
  embed_size = 100
  embedding_matrix = np.zeros((len(word2index)+1, embed_size))

  for word, i in word2index.items():
      embed_vector = embeddings.get(word)
      if embed_vector is not None:
          embedding_matrix[i] = embed_vector


  model = Sequential([
    Embedding(input_dim = len(word2index) + 1,
              output_dim = embed_size,
              input_length = maxlen,
              weights = [embedding_matrix],
              trainable = False),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dropout(0.5),
    Dense(5, activation='softmax')
])

  model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
  model.summary()

  #Train
  model.fit(Xtrain, Ytrain, epochs = 30, batch_size = 32, validation_split=0.2)

  model.save('emoji_model.h5')

  import pickle
  with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)



  #Test
  test_sentences = ["I feel good", "I feel upset", "let's eat dinner"]
  test_sentences = [s.lower() for s in test_sentences]


  test_tokens = tokenizer.texts_to_sequences(test_sentences)
  Xtest = pad_sequences(test_tokens, maxlen=maxlen, padding='post', truncating='post')

  predictions = model.predict(Xtest)
  predicted_labels = np.argmax(predictions, axis=1)

  # Output predictions
  for i, sentence in enumerate(test_sentences):
      print(f"Input: {sentence} | Predicted Emoji: {label_to_emoji(predicted_labels[i])}")



Label distribution:
 Label
2    55
3    43
0    28
4    22
1    18
Name: count, dtype: int64




Epoch 1/30
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 173ms/step - accuracy: 0.2708 - loss: 1.6051 - val_accuracy: 0.3529 - val_loss: 1.5162
Epoch 2/30
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.3389 - loss: 1.5405 - val_accuracy: 0.3529 - val_loss: 1.4914
Epoch 3/30
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.3493 - loss: 1.5026 - val_accuracy: 0.3529 - val_loss: 1.4744
Epoch 4/30
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.3806 - loss: 1.4574 - val_accuracy: 0.3529 - val_loss: 1.4615
Epoch 5/30
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.3804 - loss: 1.4386 - val_accuracy: 0.3235 - val_loss: 1.4535
Epoch 6/30
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.4489 - loss: 1.3733 - val_accuracy: 0.2647 - val_loss: 1.4334
Epoch 7/30
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 367ms/step
Input: i feel good | Predicted Emoji: 😃
Input: i feel upset | Predicted Emoji: 😃
Input: let's eat dinner | Predicted Emoji: 🍽️


In [1]:
!pip install emoji


Collecting emoji
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.14.1-py3-none-any.whl (590 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/590.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m590.6/590.6 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.14.1
