# 1_Integer Encoding

In [127]:
docs = ['go india',
        'india india',
        'congrats guys',
        'dhoni dhoni',
        'king kohli',
        'sachin sachin',
        'chennai super kings',
        'no corruption']

In [128]:
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences

In [129]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(docs)

In [130]:
tokenizer.word_counts

OrderedDict([('go', 1),
             ('india', 3),
             ('congrats', 1),
             ('guys', 1),
             ('dhoni', 2),
             ('king', 1),
             ('kohli', 1),
             ('sachin', 2),
             ('chennai', 1),
             ('super', 1),
             ('kings', 1),
             ('no', 1),
             ('corruption', 1)])

In [131]:
tokenizer.document_count

8

In [132]:
tokenizer.word_index

{'india': 1,
 'dhoni': 2,
 'sachin': 3,
 'go': 4,
 'congrats': 5,
 'guys': 6,
 'king': 7,
 'kohli': 8,
 'chennai': 9,
 'super': 10,
 'kings': 11,
 'no': 12,
 'corruption': 13}

In [133]:
sequence = tokenizer.texts_to_sequences(docs)

In [134]:
sequence

[[4, 1], [1, 1], [5, 6], [2, 2], [7, 8], [3, 3], [9, 10, 11], [12, 13]]

In [135]:
sequence = pad_sequences(sequence, padding = 'post')
sequence

array([[ 4,  1,  0],
       [ 1,  1,  0],
       [ 5,  6,  0],
       [ 2,  2,  0],
       [ 7,  8,  0],
       [ 3,  3,  0],
       [ 9, 10, 11],
       [12, 13,  0]])

# 2 RNN Architecture

In [136]:
from keras import Sequential
from keras.layers import Dense,SimpleRNN

In [137]:
model = Sequential()
model.add(SimpleRNN(32,input_shape = (2494,1)))
model.add(Dense(1,activation = 'sigmoid'))
model.summary()

  super().__init__(**kwargs)


# 3 Sentiment Analysis Using Integer Encoding

In [138]:
from keras.datasets import imdb

In [139]:
(X_train,y_train),(X_test,y_test) = imdb.load_data()

In [140]:
X_train.shape

(25000,)

In [141]:
y_train.shape

(25000,)

In [142]:
X_train = pad_sequences(X_train, padding = 'post', maxlen = 2315)
X_test = pad_sequences(X_test, padding = 'post', maxlen = 2315)
X_train.shape


(25000, 2315)

In [143]:
model.compile(optimizer = 'adam', metrics = ['accuracy'], loss = 'binary_crossentropy')
model.fit(X_train,y_train, epochs = 2, batch_size = 32, validation_data= (X_test, y_test))

Epoch 1/2
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m394s[0m 498ms/step - accuracy: 0.5129 - loss: 0.6947 - val_accuracy: 0.4958 - val_loss: 0.6954
Epoch 2/2
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 517ms/step - accuracy: 0.5032 - loss: 0.6938 - val_accuracy: 0.5036 - val_loss: 0.6933


<keras.src.callbacks.history.History at 0x12df2d652d0>

# 4 Embedding

In [144]:
from keras.layers import Embedding

In [145]:
model1 = Sequential()
model1.add(Embedding(input_dim= 14, output_dim= 4, input_length = 3 ))



In [146]:
model1.compile(optimizer='adam',metrics=['accuracy'],loss='binary_crossentropy')
model1.predict(sequence)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350ms/step


array([[[-0.04728732,  0.01989973,  0.0152255 ,  0.02513253],
        [ 0.01133859, -0.01196128,  0.01031534, -0.03696275],
        [-0.02579745,  0.01625777,  0.01198266, -0.04251304]],

       [[ 0.01133859, -0.01196128,  0.01031534, -0.03696275],
        [ 0.01133859, -0.01196128,  0.01031534, -0.03696275],
        [-0.02579745,  0.01625777,  0.01198266, -0.04251304]],

       [[ 0.03798148, -0.0278366 , -0.00570631,  0.00241654],
        [-0.02320976, -0.02201616, -0.00112246,  0.02417388],
        [-0.02579745,  0.01625777,  0.01198266, -0.04251304]],

       [[-0.03731575,  0.03542266, -0.03403046,  0.03401487],
        [-0.03731575,  0.03542266, -0.03403046,  0.03401487],
        [-0.02579745,  0.01625777,  0.01198266, -0.04251304]],

       [[ 0.00680568,  0.03514956,  0.02597681,  0.02591247],
        [ 0.04716495,  0.04195935, -0.01259819, -0.04165198],
        [-0.02579745,  0.01625777,  0.01198266, -0.04251304]],

       [[ 0.02324079,  0.03156341,  0.03642539, -0.03746182]

In [147]:
model1.summary()

# 5 Sentiment Analysis using Embedding

In [149]:
model2 = Sequential()
model2.add(Embedding(500000 ,2))
model2.add(SimpleRNN(32))
model2.add(Dense(1,activation  = 'sigmoid'))
model2.summary()

In [150]:
model2.compile(optimizer = 'adam', metrics = ['accuracy'], loss = 'binary_crossentropy')
model2.fit(X_train,y_train,epochs = 2, validation_data = (X_test, y_test))

Epoch 1/2
[1m371/782[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m6:15[0m 913ms/step - accuracy: 0.5026 - loss: 0.6941

In [None]:
model2.summary()

In [None]:
X_train.shape

(25000, 2315)

In [None]:
y_train.shape

(25000,)

In [None]:
X_test.shape

(25000, 2315)

In [None]:
y_test.shape

(25000,)