<a href="https://colab.research.google.com/github/Niraj-631/RNN/blob/main/Sentiment_Analysis1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Sample text data
texts = [
    "I love this movie",          # Positive
    "This film was great",        # Positive
    "Amazing experience",         # Positive
    "I hated this movie",         # Negative
    "This movie was terrible",    # Negative
    "Worst film ever"             # Negative
]

# Labels: 1 = Positive, 0 = Negative
labels = [1, 1, 1, 0, 0, 0]


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Initialize tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert texts to sequences of integers
sequences = tokenizer.texts_to_sequences(texts)

# Pad the sequences to the same length
padded_sequences = pad_sequences(sequences, padding='post')

# Show result
print("Word Index:", tokenizer.word_index)
print("Sequences:", sequences)
print("Padded Sequences:\n", padded_sequences)


Word Index: {'this': 1, 'movie': 2, 'i': 3, 'film': 4, 'was': 5, 'love': 6, 'great': 7, 'amazing': 8, 'experience': 9, 'hated': 10, 'terrible': 11, 'worst': 12, 'ever': 13}
Sequences: [[3, 6, 1, 2], [1, 4, 5, 7], [8, 9], [3, 10, 1, 2], [1, 2, 5, 11], [12, 4, 13]]
Padded Sequences:
 [[ 3  6  1  2]
 [ 1  4  5  7]
 [ 8  9  0  0]
 [ 3 10  1  2]
 [ 1  2  5 11]
 [12  4 13  0]]


In [None]:
tokenizer.document_count

6

In [None]:
tokenizer.word_counts

OrderedDict([('i', 2),
             ('love', 1),
             ('this', 4),
             ('movie', 3),
             ('film', 2),
             ('was', 2),
             ('great', 1),
             ('amazing', 1),
             ('experience', 1),
             ('hated', 1),
             ('terrible', 1),
             ('worst', 1),
             ('ever', 1)])

In [None]:
sequences = tokenizer.texts_to_sequences(texts)
sequences

[[3, 6, 1, 2], [1, 4, 5, 7], [8, 9], [3, 10, 1, 2], [1, 2, 5, 11], [12, 4, 13]]

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense

# Build the model
model = Sequential()
model.add(Embedding(input_dim=20, output_dim=8, input_length=padded_sequences.shape[1]))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))  # Binary classification: 0 or 1

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Show model summary
model.summary()




In [None]:
import numpy as np

# Convert labels to NumPy array
labels_array = np.array(labels)

# Train the model
model.fit(padded_sequences, labels_array, epochs=10, verbose=1)


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.5000 - loss: 0.6953
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 995ms/step - accuracy: 0.5000 - loss: 0.6937
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - accuracy: 0.5000 - loss: 0.6922
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step - accuracy: 0.6667 - loss: 0.6907
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 311ms/step - accuracy: 0.6667 - loss: 0.6892
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.8333 - loss: 0.6876
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step - accuracy: 0.8333 - loss: 0.6861
Epoch 8/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step - accuracy: 0.8333 - loss: 0.6846
Epoch 9/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x7beeb45affd0>

In [44]:
# New test sentences
test_sentences = [
    "I really love this film",      # Positive
    "This movie was awful",         # Negative
    "What a great experience",      # Positive
    "I will never watch it again"   # Negative
]

# Convert to sequences
test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded = pad_sequences(test_sequences, maxlen=padded_sequences.shape[1], padding='post')

# Make predictions
predictions = model.predict(test_padded)

# Show results
for sentence, pred in zip(test_sentences, predictions):
    sentiment = "Positive" if pred > 0.5 else "Negative"
    print(f"{sentence} --> {sentiment} (Confidence: {pred[0]:.2f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 369ms/step
I really love this film --> Negative (Confidence: 0.45)
This movie was awful --> Positive (Confidence: 0.52)
What a great experience --> Negative (Confidence: 0.43)
I will never watch it again --> Negative (Confidence: 0.41)


In [45]:
#  1. Your own custom test sentences
test_sentences = [
    "I enjoyed the show",          # Expected: Positive
    "It was a boring movie",       # Expected: Negative
    "Fantastic story and acting",  # Positive
    "I regret watching it"         # Negative
]

#  2. Convert to sequences and pad
test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded = pad_sequences(test_sequences, maxlen=padded_sequences.shape[1], padding='post')

#  3. Predict sentiment
predictions = model.predict(test_padded)

#  4. Display results
for sentence, pred in zip(test_sentences, predictions):
    sentiment = "Positive " if pred > 0.5 else "Negative "
    print(f"{sentence} --> {sentiment} (Confidence: {pred[0]:.2f})")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
I enjoyed the show --> Negative  (Confidence: 0.41)
It was a boring movie --> Negative  (Confidence: 0.49)
Fantastic story and acting --> Negative  (Confidence: 0.49)
I regret watching it --> Negative  (Confidence: 0.41)


In [None]:
from keras.datasets import imdb
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Flatten

In [None]:
(X_train, y_train), (X_test, y_test) = imdb.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
X_train.shape

(25000,)

In [None]:
X_test

array([list([1, 591, 202, 14, 31, 6, 717, 10, 10, 18142, 10698, 5, 4, 360, 7, 4, 177, 5760, 394, 354, 4, 123, 9, 1035, 1035, 1035, 10, 10, 13, 92, 124, 89, 488, 7944, 100, 28, 1668, 14, 31, 23, 27, 7479, 29, 220, 468, 8, 124, 14, 286, 170, 8, 157, 46, 5, 27, 239, 16, 179, 15387, 38, 32, 25, 7944, 451, 202, 14, 6, 717]),
       list([1, 14, 22, 3443, 6, 176, 7, 5063, 88, 12, 2679, 23, 1310, 5, 109, 943, 4, 114, 9, 55, 606, 5, 111, 7, 4, 139, 193, 273, 23, 4, 172, 270, 11, 7216, 10626, 4, 8463, 2801, 109, 1603, 21, 4, 22, 3861, 8, 6, 1193, 1330, 10, 10, 4, 105, 987, 35, 841, 16873, 19, 861, 1074, 5, 1987, 17975, 45, 55, 221, 15, 670, 5304, 526, 14, 1069, 4, 405, 5, 2438, 7, 27, 85, 108, 131, 4, 5045, 5304, 3884, 405, 9, 3523, 133, 5, 50, 13, 104, 51, 66, 166, 14, 22, 157, 9, 4, 530, 239, 34, 8463, 2801, 45, 407, 31, 7, 41, 3778, 105, 21, 59, 299, 12, 38, 950, 5, 4521, 15, 45, 629, 488, 2733, 127, 6, 52, 292, 17, 4, 6936, 185, 132, 1988, 5304, 1799, 488, 2693, 47, 6, 392, 173, 4, 21686, 4

In [None]:
X_train[0]

[1,
 14,
 22,
 16,
 43,
 530,
 973,
 1622,
 1385,
 65,
 458,
 4468,
 66,
 3941,
 4,
 173,
 36,
 256,
 5,
 25,
 100,
 43,
 838,
 112,
 50,
 670,
 22665,
 9,
 35,
 480,
 284,
 5,
 150,
 4,
 172,
 112,
 167,
 21631,
 336,
 385,
 39,
 4,
 172,
 4536,
 1111,
 17,
 546,
 38,
 13,
 447,
 4,
 192,
 50,
 16,
 6,
 147,
 2025,
 19,
 14,
 22,
 4,
 1920,
 4613,
 469,
 4,
 22,
 71,
 87,
 12,
 16,
 43,
 530,
 38,
 76,
 15,
 13,
 1247,
 4,
 22,
 17,
 515,
 17,
 12,
 16,
 626,
 18,
 19193,
 5,
 62,
 386,
 12,
 8,
 316,
 8,
 106,
 5,
 4,
 2223,
 5244,
 16,
 480,
 66,
 3785,
 33,
 4,
 130,
 12,
 16,
 38,
 619,
 5,
 25,
 124,
 51,
 36,
 135,
 48,
 25,
 1415,
 33,
 6,
 22,
 12,
 215,
 28,
 77,
 52,
 5,
 14,
 407,
 16,
 82,
 10311,
 8,
 4,
 107,
 117,
 5952,
 15,
 256,
 4,
 31050,
 7,
 3766,
 5,
 723,
 36,
 71,
 43,
 530,
 476,
 26,
 400,
 317,
 46,
 7,
 4,
 12118,
 1029,
 13,
 104,
 88,
 4,
 381,
 15,
 297,
 98,
 32,
 2071,
 56,
 26,
 141,
 6,
 194,
 7486,
 18,
 4,
 226,
 22,
 21,
 134,
 476,
 26,
 480,
 5

In [None]:
len(X_train[0])

218

In [None]:
len(X_train[1])

189

In [None]:
len(X_train[2])

141

In [None]:
X_train = pad_sequences(X_train, padding='post', maxlen=50)
X_test = pad_sequences(X_test, padding='post', maxlen=50)

In [None]:
X_train.shape

(25000, 50)

In [None]:
X_train

array([[ 2071,    56,    26, ...,    19,   178,    32],
       [ 8255,     5, 25249, ...,    16,   145,    95],
       [  215,    28,   610, ...,     7,   129,   113],
       ...,
       [    4,    65,   496, ...,     4,  3586, 22459],
       [   13,    18,    31, ...,    12,     9,    23],
       [ 7585,     8,  2197, ...,   204,   131,     9]], dtype=int32)

In [None]:
X_train[0]

array([2071,   56,   26,  141,    6,  194, 7486,   18,    4,  226,   22,
         21,  134,  476,   26,  480,    5,  144,   30, 5535,   18,   51,
         36,   28,  224,   92,   25,  104,    4,  226,   65,   16,   38,
       1334,   88,   12,   16,  283,    5,   16, 4472,  113,  103,   32,
         15,   16, 5345,   19,  178,   32], dtype=int32)

In [None]:
model = Sequential()

model.add(SimpleRNN(32,input_shape=(50,1),return_sequences=False))
model.add(Dense(1, activation='sigmoid'))
model.summary()

  super().__init__(**kwargs)


In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13ms/step - accuracy: 0.4964 - loss: 0.7005 - val_accuracy: 0.5012 - val_loss: 0.6942
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 7ms/step - accuracy: 0.5089 - loss: 0.6931 - val_accuracy: 0.5085 - val_loss: 0.6937
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.5044 - loss: 0.6924 - val_accuracy: 0.5036 - val_loss: 0.6954
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.5079 - loss: 0.6926 - val_accuracy: 0.5029 - val_loss: 0.6957
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.5046 - loss: 0.6929 - val_accuracy: 0.5057 - val_loss: 0.6939


<keras.src.callbacks.history.History at 0x7bed71a57fd0>

Embedding


In [None]:
# Sample text data
texts = [
    "I love this movie",          # Positive
    "This film was great",        # Positive
    "Amazing experience",         # Positive
    "I hated this movie",         # Negative
    "This movie was terrible",    # Negative
    "Worst film ever"             # Negative
]


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

In [None]:
tokenizer.fit_on_texts(texts)

In [None]:
len(tokenizer.word_index)

13

In [None]:
sequences = tokenizer.texts_to_sequences(texts)
sequences

[[3, 6, 1, 2], [1, 4, 5, 7], [8, 9], [3, 10, 1, 2], [1, 2, 5, 11], [12, 4, 13]]

In [None]:
from tensorflow.keras.utils import pad_sequences
sequences = pad_sequences(sequences, padding='post')
sequences

array([[ 3,  6,  1,  2],
       [ 1,  4,  5,  7],
       [ 8,  9,  0,  0],
       [ 3, 10,  1,  2],
       [ 1,  2,  5, 11],
       [12,  4, 13,  0]], dtype=int32)

In [None]:
model = Sequential()
model.add(Embedding(13, output_dim=2, input_length=4))
model.summary()



In [None]:
model.compile('adam','accuracy')

In [None]:
pred = model.predict(sequences)
print(pred)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
[[[ 0.02266419  0.01891928]
  [ 0.04894963  0.04758375]
  [ 0.0048544  -0.03133726]
  [-0.01637935  0.00893205]]

 [[ 0.0048544  -0.03133726]
  [ 0.03010913  0.02067629]
  [ 0.00412197  0.00985966]
  [-0.0403411   0.00541832]]

 [[-0.0006205  -0.01115737]
  [ 0.04907734  0.03571421]
  [-0.02960235 -0.01145168]
  [-0.02960235 -0.01145168]]

 [[ 0.02266419  0.01891928]
  [ 0.01581225 -0.03629338]
  [ 0.0048544  -0.03133726]
  [-0.01637935  0.00893205]]

 [[ 0.0048544  -0.03133726]
  [-0.01637935  0.00893205]
  [ 0.00412197  0.00985966]
  [ 0.01409776 -0.00776644]]

 [[-0.02235299  0.03414389]
  [ 0.03010913  0.02067629]
  [-0.02235299  0.03414389]
  [-0.02960235 -0.01145168]]]


In [None]:
from keras.datasets import imdb
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Flatten

In [None]:
(X_train, y_train), (X_test, y_test) = imdb.load_data()

In [None]:
X_train = pad_sequences(X_train, padding='post', maxlen=50)
X_test = pad_sequences(X_test, padding='post', maxlen=50)

In [None]:
X_train.shape

(25000, 50)

In [None]:
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=2, input_length=50))
model.add(SimpleRNN(32, return_sequences=False))
model.add(Dense(1, activation='sigmoid'))

model.summary()



In [None]:
model.build(input_shape=(None, 50))  # batch_size is None (any), 50 is sequence length
model.summary()


In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.5505 - loss: 0.6777 - val_accuracy: 0.7752 - val_loss: 0.4805
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.8106 - loss: 0.4271 - val_accuracy: 0.7957 - val_loss: 0.4448
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.8532 - loss: 0.3553 - val_accuracy: 0.7970 - val_loss: 0.4579
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.8683 - loss: 0.3200 - val_accuracy: 0.7878 - val_loss: 0.4696
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.8747 - loss: 0.3154 - val_accuracy: 0.7746 - val_loss: 0.4866
