In [None]:
import pandas as pd
import numpy as np
import re
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import load_model
import urllib.request
import zipfile
import os
from keras.models import Sequential
from keras.layers import Embedding,Bidirectional,LSTM,GRU,Dense
import nltk
from nltk.tokenize import word_tokenize
import warnings
import tensorflow as tf
nltk.download('punkt')
warnings.filterwarnings('ignore')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
f=open('/content/train.txt','r')
x_train=[]
y_train=[]
for i in f:
    l=i.split(';')
    y_train.append(l[1].strip())
    x_train.append(l[0])
f=open('/content/test.txt','r')
x_test=[]
y_test=[]
x_real_test = []
y_real_test = []
for i in f:
    l=i.split(';')
    y_real_test.append(l[1].strip())
    x_real_test.append(l[0])
f=open('/content/val.txt','r')
for i in f:
    l=i.split(';')
    y_test.append(l[1].strip())
    x_test.append(l[0])
data_train=pd.DataFrame({'Text':x_train,'Emotion':y_train})
data_test=pd.DataFrame({'Text':x_test,'Emotion':y_test})
data_real_test= pd.DataFrame({'Text':x_real_test,'Emotion':y_real_test})
data_train.append(data_real_test,ignore_index=True)
data=data_train.append(data_test,ignore_index=True)

In [None]:
def clean_text(data):
    data=re.sub(r"(#[\d\w\.]+)", '', data)
    data=re.sub(r"(@[\d\w\.]+)", '', data)
    data=word_tokenize(data)
    return data
texts=[' '.join(clean_text(text)) for text in data.Text]
texts_train=[' '.join(clean_text(text)) for text in x_train]
texts_test=[' '.join(clean_text(text)) for text in x_test]
texts_real_test=[' '.join(clean_text(text)) for text in x_real_test]

In [None]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(texts)
sequence_train=tokenizer.texts_to_sequences(texts_train)
sequence_test=tokenizer.texts_to_sequences(texts_test)
sequence_real_test=tokenizer.texts_to_sequences(texts_real_test)

index_of_words=tokenizer.word_index
vocab_size=len(index_of_words)+1

In [None]:
texts_real_test

In [None]:
num_classes=6
embed_num_dims=300
max_seq_len=500
class_names=['anger','sadness','fear','joy','surprise','love']
X_train_pad=pad_sequences(sequence_train,maxlen=max_seq_len)
X_test_pad=pad_sequences(sequence_test,maxlen=max_seq_len)
X_real_test_pad=pad_sequences(sequence_real_test,maxlen=max_seq_len)

encoding={'anger':0,'sadness':1,'fear':2,'joy':3,'surprise':4,'love':5}
y_train=[encoding[x] for x in data_train.Emotion]
y_test=[encoding[x] for x in data_test.Emotion]
y_real_test=[encoding[x] for x in data_real_test.Emotion]
y_train=to_categorical(y_train)
y_test=to_categorical(y_test)
y_real_test=to_categorical(y_real_test)

In [None]:
def create_embedding_matrix(filepath,word_index,embedding_dim):
    vocab_size=len(word_index)+1
    embedding_matrix=np.zeros((vocab_size,embedding_dim))
    with open(filepath) as f:
        for line in f:
            word,*vector=line.split()
            if word in word_index:
                idx=word_index[word]
                embedding_matrix[idx] = np.array(vector,dtype=np.float32)[:embedding_dim]
    return embedding_matrix
fname='/content/drive/MyDrive/prism/wiki-news-300d-1M.vec'
embedd_matrix=create_embedding_matrix(fname,index_of_words,embed_num_dims)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
embedd_layer=Embedding(vocab_size,embed_num_dims,input_length=max_seq_len,weights=[embedd_matrix],trainable=False)
gru_output_size=128
bidirectional=True
model=Sequential()
model.add(embedd_layer)
model.add(Bidirectional(GRU(units=gru_output_size,dropout=0.2,recurrent_dropout=0.2)))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
batch_size=128
epochs=4
hist=model.fit(X_train_pad,y_train,batch_size=batch_size,epochs=epochs,validation_data=(X_test_pad,y_test))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [None]:
model.save("fin_model.h5")

In [None]:
import emoji
#message=[emoji.demojize('😨😰😱').replace(":"," ")]
message=["im sad"]
seq=tokenizer.texts_to_sequences(message)
padded=pad_sequences(seq,maxlen=max_seq_len)
pred=model.predict(padded)
#poda = model.evaluate(X_real_test_pad,y_real_test, verbose=0)
print('Message:'+str(message))
print('Emotion:',class_names[np.argmax(pred)])

Message:['im happy']
Emotion: joy


In [None]:
tf.keras.models.save_model(model,'textmodel',overwrite=True,include_optimizer=True,save_format=None,signatures=None,options=None)



In [None]:
!pip install emoji


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting emoji
  Downloading emoji-2.2.0.tar.gz (240 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.9/240.9 KB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-2.2.0-py3-none-any.whl size=234926 sha256=6081d91b9685425bd8b5a613a9dfa0a71e0b9cde93e66ac20bac7c77994da82b
  Stored in directory: /root/.cache/pip/wheels/9a/b8/0f/f580817231cbf59f6ade9fd132ff60ada1de9f7dc85521f857
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-2.2.0


In [None]:
models = tf.keras.models.load_model("/content/fin_model.h5")
# load=load_model("/content/fin_model.h5")
message = ["i am sad"]
seq = tokenizer.texts_to_sequences(message)
padded = pad_sequences(seq, maxlen=500)
pred = models.predict(padded)
print(class_names[np.argmax(pred)])

sadness
