In [27]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

In [122]:
class EmojiTextClassifier:
    def __init__(self,vector_shape):
        self.vector_shape = vector_shape

    def load_dataset(self,dataset_path):
        self.df = pd.read_csv(dataset_path)
        self.X = np.array(self.df['sentence'],dtype=object)
        self.Y = np.array(self.df['label'],dtype=int)
        return self.X,self.Y

    def load_features_vector(self,file_txt_path):
        self.f = open(file_txt_path,encoding='utf-8')
        self.word_vectors = {}
        for line in self.f:
            line = line.strip().split()
            word = line[0]
            vector = np.array(line[1:],dtype=np.float64)
            self.word_vectors[word] = vector

        return self.word_vectors


    def sentence_to_feature_vectors_avg(self,sentence):
          self.sentence = sentence.lower()
          words = self.sentence.strip().split(' ')
          sum_vectors = np.zeros((self.vector_shape,))
          for word in words:
              sum_vectors += self.word_vectors[word]

          avg_words = sum_vectors / len(words)

          return avg_words


    def preprocess(self,X,Y):
        self.X_avg = []
        self.X = X
        self.Y = Y
        for x in X:
            self.X_avg.append(self.sentence_to_feature_vectors_avg(x))


        self.X_avg = np.array(self.X_avg).astype('float32')
        self.Y_one_hot = tf.keras.utils.to_categorical(self.Y,num_classes=5)

        return self.X_avg,self.Y_one_hot

    def load_model(self):

        model = tf.keras.models.Sequential([
            tf.keras.layers.Dropout(0.1),
            tf.keras.layers.Dense(5,input_shape=(self.vector_shape,),activation='softmax')
        ])
        return model

    def train(self,X_train,Y_train,epochs):
        #self.input_shape =input_shape
        self.X_train_avg,self.Y_train_one_hot = self.preprocess(X_train,Y_train)
        print(self.X_train_avg.shape,self.Y_train_one_hot.shape)
        self.model = self.load_model()
        self.model.compile(
            optimizer=tf.keras.optimizers.Adam(),
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )
        #self.model.fit(self.X_train_avg,self.Y_train_one_hot,epochs)
        return self.model

    def test(self,model,X_test,Y_test):
        self.X_test = X_test
        self.Y_test = Y_test
        #self.X_test_avg,self.Y_test_one_hot = self.preprocess(self.X_test,self.Y_test)
        self.X_avg = []
        self.new_Y_test = []
        i = 0

        for index,x in enumerate(self.X_test):
            if x.endswith('\t') == True:
              self.X_avg.append(self.sentence_to_feature_vectors_avg(x))
              self.new_Y_test.append(self.Y_test[index])



        self.X_avg = np.array(self.X_avg)
        self.new_Y_test = np.array(self.new_Y_test)
        self.Y_one_hot = tf.keras.utils.to_categorical(self.new_Y_test,num_classes=5)
        print(np.shape(self.X_avg),np.shape(self.Y_one_hot))

        accuracy,loss = model.evaluate(self.X_avg,self.Y_one_hot)

        return accuracy,loss

    def label_to_emoji(self,label):
        self.label = label
        emojies=['❤️','⚾','😊','😞','🍴']

        return emojies[self.label]

    def predict(self,sentece_test):
        start_time = time.time()
        self.sentence_test = sentece_test
        self.my_test_avg = self.sentence_to_feature_vectors_avg(self.sentence_test)
        self.my_test_avg = np.array([self.my_test_avg])
        self.result = self.model.predict(self.my_test_avg)
        y_pred = np.argmax(self.result)

        return self.label_to_emoji(y_pred),time.time() - start_time

## 50 Dimention

In [101]:
emoji_text_50d = EmojiTextClassifier(50)

In [None]:
X_train,Y_train = emoji_text_50d.load_dataset('/content/drive/MyDrive/Emoji_Text_Classification/train.csv')
X_test,Y_test= emoji_text_50d.load_dataset('/content/drive/MyDrive/Emoji_Text_Classification/test.csv')

emoji_text_50d.load_features_vector('/content/drive/MyDrive/Emoji_Text_Classification/glove-6B/glove.6B.50d.txt')
X_train_avg,Y_train_one_hot = emoji_text_50d.preprocess(X_train,Y_train)
#model = emoji_text_50d.load_model(input_shape=50)
model=emoji_text_50d.train(X_train,Y_train,epochs=300)
model.fit(X_train_avg,Y_train_one_hot,epochs=300)

In [103]:
accuracy,loss = emoji_text_50d.test(model,X_test,Y_test)



In [104]:
infrence = 'the Weather is Sunny'
emoji,timer = emoji_text_50d.predict(infrence)
emoji,timer



('😊', 0.18008732795715332)

## 100 Dimention

In [123]:
emoji_text_100d = EmojiTextClassifier(100)

In [124]:
X_train,Y_train = emoji_text_100d.load_dataset('/content/drive/MyDrive/Emoji_Text_Classification/train.csv')
X_test,Y_test = emoji_text_100d.load_dataset('/content/drive/MyDrive/Emoji_Text_Classification/test.csv')

In [None]:
emoji_text_100d.load_features_vector('/content/drive/MyDrive/Emoji_Text_Classification/glove-6B/glove.6B.100d.txt')
X_train_avg,Y_train_one_hot = emoji_text_100d.preprocess(X_train,Y_train)
model_100 = emoji_text_100d.train(X_train,Y_train,epochs=300)
model_100.fit(X_train_avg,Y_train_one_hot,epochs=300)

In [127]:
loss,accuracy= emoji_text_100d.test(model_100,X_test,Y_test)
loss,accuracy

(49, 100) (49, 5)


(0.6888834834098816, 0.8163265585899353)

In [131]:
infrence = 'I hate Cloudy weather'
emoji,timer = emoji_text_100d.predict(infrence)
emoji,timer



('😞', 14.203638792037964)

## 200 Dimention

In [116]:
emoji_text_200d = EmojiTextClassifier(200)

In [117]:
X_train,Y_train = emoji_text_100d.load_dataset('/content/drive/MyDrive/Emoji_Text_Classification/train.csv')
X_test,Y_test = emoji_text_100d.load_dataset('/content/drive/MyDrive/Emoji_Text_Classification/test.csv')

In [None]:
emoji_text_200d.load_features_vector('/content/drive/MyDrive/Emoji_Text_Classification/glove-6B/glove.6B.200d.txt')
X_train_avg,Y_train_one_hot = emoji_text_200d.preprocess(X_train,Y_train)
model_200 = emoji_text_200d.train(X_train,Y_train,epochs=300)
model_200.fit(X_train_avg,Y_train_one_hot,epochs=300)

In [129]:
loss,accuracy= emoji_text_200d.test(model_200,X_test,Y_test)
loss,accuracy





(0.5232657194137573, 0.8163265585899353)

In [133]:
infrence = 'I love rainy weather'
emoji,timer = emoji_text_200d.predict(infrence)
emoji,timer



('❤️', 0.11483097076416016)

## 300 Dimentions

In [134]:
emoji_text_300d = EmojiTextClassifier(300)

In [135]:
X_train,Y_train = emoji_text_100d.load_dataset('/content/drive/MyDrive/Emoji_Text_Classification/train.csv')
X_test,Y_test = emoji_text_100d.load_dataset('/content/drive/MyDrive/Emoji_Text_Classification/test.csv')

In [137]:
emoji_text_300d.load_features_vector('/content/drive/MyDrive/Emoji_Text_Classification/glove-6B/glove.6B.300d.txt')
X_train_avg,Y_train_one_hot = emoji_text_300d.preprocess(X_train,Y_train)
model_300 = emoji_text_300d.train(X_train,Y_train,epochs=300)
model_300.fit(X_train_avg,Y_train_one_hot,epochs=300)

(132, 300) (132, 5)
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
E

<keras.src.callbacks.History at 0x7f91e3c8a2f0>

In [138]:
loss,accuracy= emoji_text_300d.test(model_300,X_test,Y_test)
loss,accuracy

(49, 300) (49, 5)






(0.4915929436683655, 0.8775510191917419)

In [139]:
infrence = 'I workout in gym'
emoji,timer = emoji_text_300d.predict(infrence)
emoji,timer



('⚾', 0.127455472946167)