In [38]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [39]:
# Sample text dataset
texts = [
    "I love this movie, it was amazing",
    "This film is terrible and boring",
    "Fantastic storyline and great acting",
    "I hated the movie, worst experience",
    "An excellent movie with brilliant performances"
]

# Corresponding labels (1 = positive, 0 = negative)
labels = np.array([1, 0, 1, 0, 1])


In [71]:
tokenizer = Tokenizer(num_words=10000, oov_token='<nothing>')
tokenizer.fit_on_texts(texts)

In [41]:
sequences = tokenizer.texts_to_sequences(texts)

In [42]:
max_length = 10 
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

print("Word Index:\n", tokenizer.word_index)
print("Padded Sequences:\n", padded_sequences)

Word Index:
 {'<nothing>': 1, 'movie': 2, 'i': 3, 'this': 4, 'and': 5, 'love': 6, 'it': 7, 'was': 8, 'amazing': 9, 'film': 10, 'is': 11, 'terrible': 12, 'boring': 13, 'fantastic': 14, 'storyline': 15, 'great': 16, 'acting': 17, 'hated': 18, 'the': 19, 'worst': 20, 'experience': 21, 'an': 22, 'excellent': 23, 'with': 24, 'brilliant': 25, 'performances': 26}
Padded Sequences:
 [[ 3  6  4  2  7  8  9  0  0  0]
 [ 4 10 11 12  5 13  0  0  0  0]
 [14 15  5 16 17  0  0  0  0  0]
 [ 3 18 19  2 20 21  0  0  0  0]
 [22 23  2 24 25 26  0  0  0  0]]


In [43]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=16, input_length=max_length),  # Embedding Layer
    SimpleRNN(32, return_sequences=False),  # RNN Layer
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

In [44]:
model.build(input_shape=(None, 10))
model.summary()

In [45]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [46]:
model.fit(padded_sequences, labels, epochs=7)

Epoch 1/7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.6000 - loss: 0.6858
Epoch 2/7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - accuracy: 0.8000 - loss: 0.6660
Epoch 3/7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.8000 - loss: 0.6466
Epoch 4/7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 1.0000 - loss: 0.6272
Epoch 5/7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 1.0000 - loss: 0.6076
Epoch 6/7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 1.0000 - loss: 0.5875
Epoch 7/7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 1.0000 - loss: 0.5665


<keras.src.callbacks.history.History at 0x167c3fc1880>

In [58]:
new_review = ["The movie was fantastic"]
new_seq = tokenizer.texts_to_sequences(new_review)
new_padded_seq = pad_sequences(new_seq, maxlen=max_length, padding='post')

In [59]:
prediction = model.predict(new_padded_seq)
sentiment = "Positive" if prediction>0.5 else "Negative"
print(f'Model Prediction is {sentiment}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Model Prediction is Positive


In [83]:
from keras.datasets import imdb

In [94]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

In [95]:
X_train = pad_sequences(X_train, padding='post', maxlen=50, truncating='post')
X_test = pad_sequences(X_test, padding='post', maxlen=50, truncating='post')

In [97]:
X_train[0]

array([   1,   14,   22,   16,   43,  530,  973, 1622, 1385,   65,  458,
       4468,   66, 3941,    4,  173,   36,  256,    5,   25,  100,   43,
        838,  112,   50,  670,    2,    9,   35,  480,  284,    5,  150,
          4,  172,  112,  167,    2,  336,  385,   39,    4,  172, 4536,
       1111,   17,  546,   38,   13,  447])

In [98]:
print("Max word index in X_train:", max(max(seq) for seq in X_train if len(seq) > 0))


Max word index in X_train: 9999


In [99]:
X_train.shape

(25000, 50)

In [100]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=2, input_length=50),  # Embedding Layer
    SimpleRNN(32, return_sequences=False),  # RNN Layer
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])



In [101]:
model.build(input_shape=(None, 50))
model.summary()

In [104]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(X_train, y_train,epochs=10,validation_data=(X_test,y_test))

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - acc: 0.8598 - loss: 0.3344 - val_acc: 0.7357 - val_loss: 0.6078
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - acc: 0.8794 - loss: 0.2909 - val_acc: 0.7144 - val_loss: 0.6209
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - acc: 0.9044 - loss: 0.2547 - val_acc: 0.7254 - val_loss: 0.6705
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - acc: 0.9176 - loss: 0.2191 - val_acc: 0.7212 - val_loss: 0.7590
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - acc: 0.9334 - loss: 0.1850 - val_acc: 0.7022 - val_loss: 0.7952
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - acc: 0.9398 - loss: 0.1707 - val_acc: 0.7002 - val_loss: 0.8394
Epoch 7/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/st

In [105]:
loss, acc = model.evaluate(X_test, y_test)
print(f'Accuracy is {acc:.4f}')

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - acc: 0.7106 - loss: 1.1252
Accuracy is 0.7064
