In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd


In [2]:
def read_csv(file_path):
  df = pd.read_csv(file_path)
  X = np.array(df["sentence"]) 
  Y = np.array(df["label"], dtype=int)  
  return X, Y


In [3]:
X_train, Y_train = read_csv("/content/dataset/train.csv")
X_test, Y_test = read_csv("/content/dataset/test.csv")

In [5]:
index = 7
X_train[index], Y_train[index]

('congratulations on your acceptance', 2)

In [4]:
def label_to_emoji(label):
  emojies = ["💚", "⚽️", "😍", "😞", "🍴"]
  return emojies[label]


In [6]:
index = 7
X_train[index], label_to_emoji(Y_train[index])

('congratulations on your acceptance', '😍')

In [10]:
unique, counts = np.unique(Y_train, return_counts=True)
print(unique)  
print(counts)  

[0 1 2 3 4]
[22 19 38 36 17]


In [11]:
max(X_train, key=len)    

'I am so impressed by your dedication to this project'

In [12]:
max(X_train, key=len).split(" ")  

['I',
 'am',
 'so',
 'impressed',
 'by',
 'your',
 'dedication',
 'to',
 'this',
 'project']

In [16]:
max_len = len(max(X_train, key=len).split(" "))  
max_len

10

In [17]:
!wget https://nlp.stanford.edu/data/glove.6B.zip  

--2024-01-26 22:03:51--  https://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2024-01-26 22:03:52--  https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


2024-01-26 22:06:31 (5.18 MB/s) - ‘glove.6B.zip’ saved [862182613/862182613]



In [18]:
!unzip -q glove.6B -d glove.6B 

In [28]:
f = open("/content/glove.6B/glove.6B.50d.txt", encoding="utf-8")  

In [29]:
word_vectors = {}

for line in f:
  line = line.strip().split()
  word = line[0]
  vector = np.array(line[1:], dtype=np.float64)  
  word_vectors[word] = vector

In [31]:
word_vectors["snake"]

array([ 0.49251 , -0.24279 , -0.49748 ,  0.28443 ,  0.16984 ,  0.61016 ,
        0.20294 , -0.19734 ,  0.93474 , -0.11809 , -0.26342 ,  0.97142 ,
        1.0427  ,  0.60017 , -0.46936 ,  0.10087 ,  0.60649 ,  1.1277  ,
       -1.1823  , -0.29334 , -0.72885 , -0.46904 ,  1.1104  ,  0.27504 ,
        0.48043 , -1.3031  , -0.58713 ,  0.90264 ,  0.089552, -0.60348 ,
        1.1117  , -0.85367 , -0.13902 ,  0.87767 , -0.19307 ,  0.10299 ,
       -0.83688 , -0.87202 ,  0.46529 , -0.22325 , -0.49207 ,  0.33727 ,
       -0.49699 ,  0.95006 ,  0.75007 , -0.21252 ,  0.47244 , -1.4552  ,
        0.11704 , -0.61483 ])

In [40]:
def sentence_to_avg(sentence): 
  try:
    sentence = sentence.lower()  
    words = sentence.strip().split(" ")

    sum_vectors = np.zeros((50, ))
    for word in words:
      sum_vectors += word_vectors[word]

    avg_vector = sum_vectors / len(words)
    return avg_vector
  except:
    print("نتوانستم")
    return None



In [41]:
sentence_to_avg("i love programming too much")

array([ 0.157256  ,  0.1669862 , -0.188816  , -0.27733   ,  0.316686  ,
        0.2276832 , -0.442016  , -0.19912792, -0.528786  ,  0.58095151,
       -0.099434  ,  0.543644  , -0.400818  , -0.0196456 ,  0.6553478 ,
        0.357032  ,  0.110736  ,  0.3904726 , -0.015683  , -0.5693798 ,
       -0.11808   ,  0.662674  ,  0.50238   ,  0.380024  ,  0.798924  ,
       -1.542946  , -0.8024376 ,  0.265944  ,  0.838432  , -0.497945  ,
        3.32178   ,  0.516902  , -0.0628614 , -0.236668  , -0.1666506 ,
       -0.104354  , -0.144238  ,  0.238698  , -0.116562  , -0.335334  ,
        0.00965334,  0.2257584 , -0.261848  ,  0.44801   , -0.2448288 ,
        0.1346956 ,  0.0192128 , -0.21776632, -0.28343   ,  0.698352  ])

In [43]:
X_train_avg = []

for x_train in X_train:
  X_train_avg.append(sentence_to_avg(x_train))

X_train_avg = np.array(X_train_avg)  

X_train_avg.shape

(132, 50)

In [None]:
Y_train_one_hot = tf.keras.utils.to_categorical(Y_train, num_classes=5)
Y_train_one_hot

In [47]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(5, input_shape=(50,), activation="softmax")
])

In [48]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

In [50]:
model.fit(X_train_avg, Y_train_one_hot, epochs=250)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

<keras.src.callbacks.History at 0x793b87f756f0>

In [56]:
my_test = "I like watching football"

my_test_avg = sentence_to_avg(my_test)
my_test_avg

array([-0.33470468,  0.296985  , -0.32625075, -0.12892675,  0.289635  ,
       -0.4978425 , -0.870515  ,  0.074885  , -0.5578725 , -0.00857761,
        0.0234025 ,  0.4729155 , -0.615535  ,  0.07134   ,  1.013365  ,
        0.326581  ,  0.31782425,  0.61534   , -0.62832425, -0.5111925 ,
       -0.4746425 ,  0.47326   ,  0.430055  ,  0.5530325 ,  0.3975425 ,
       -1.845075  , -0.72657975,  0.3544825 ,  0.3717075 , -0.82037   ,
        2.85      ,  0.8758275 , -0.19117675, -0.2705945 , -0.130911  ,
        0.2750675 , -0.0068475 ,  0.249615  ,  0.0608925 , -0.4966    ,
       -0.24339108,  0.2379205 , -0.3379743 ,  0.4391425 ,  0.1994765 ,
        0.36621225, -0.067645  , -0.2157725 , -0.26459225,  0.2223975 ])

In [57]:
my_test_avg = np.array([my_test_avg]) 

result = model.predict(my_test_avg)
result



array([[0.11939   , 0.69525236, 0.15358767, 0.01698845, 0.01478158]],
      dtype=float32)

In [58]:
y_pred = np.argmax(result)
y_pred

1

In [59]:
label_to_emoji(y_pred)

'⚽️'