In [None]:
!pip install tensorflow-datasets


In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [7]:
#list of datasets present in tensorflow
import tensorflow_datasets as tfds

all_datasets = tfds.list_builders()
print("Total datasets available:", len(all_datasets))
print("First 20 datasets:\n", all_datasets[:20]) 

Total datasets available: 438
First 20 datasets:
 ['abstract_reasoning', 'accentdb', 'aeslc', 'aflw2k3d', 'ag_news_subset', 'ai2_arc', 'ai2_arc_with_ir', 'ai2dcaption', 'aloha_mobile', 'amazon_us_reviews', 'anli', 'answer_equivalence', 'arc', 'asimov_dilemmas_auto_val', 'asimov_dilemmas_scifi_train', 'asimov_dilemmas_scifi_val', 'asimov_injury_val', 'asimov_multimodal_auto_val', 'asimov_multimodal_manual_val', 'asqa']


In [9]:
from tensorflow.keras.datasets import imdb #Import the imdb dataset

In [10]:
num_words = 10000 #use top 10,000 words only
(x_train,y_train),(x_test,y_test) = imdb.load_data(num_words = num_words)

In [14]:
print("Training samples:",len(x_train))
print("Test samples:",len(x_test))

Training samples: 25000
Test samples: 25000


In [16]:
# Preprocess the data (pad sequences to ensure uniform input size)
maxlen = 500  # Maximum review length in words
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [17]:
# Build the Deep Neural Network model
model = Sequential([
    Embedding(input_dim=num_words, output_dim=32, input_length=maxlen),  # Word embedding layer
    Flatten(),  # Flatten to feed into dense layers
    Dense(128, activation='relu'),  # First hidden layer
    Dense(64, activation='relu'),   # Second hidden layer
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])




In [18]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])





In [19]:
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/5






Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1d330e0f710>

In [20]:
#Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"\nTest Accuracy: {accuracy:.4f}")


Test Accuracy: 0.8512


In [24]:
# Show predictions on a few test samples

word_index = imdb.get_word_index() # Get the word-to-index mapping used in the IMDB dataset

reverse_word_index = {value: key for key, value in word_index.items()} # Create a reverse mapping from index to word (to decode reviews back to text)

def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review if i >= 3])  # - Subtract 3 (because indices 0, 1, and 2 are reserved for special tokens)
                                                                                             # - Get the corresponding word from reverse_word_index
                                                                                             # - If index is not found, use '?' as a placeholder
                                                                                             # - Only process indices >= 3 to skip special tokens


In [47]:
print("\nSample Predictions:\n")
for i in range(5,10):
    review_text = decode_review(x_test[i])
    predicted_label = "Positive" if prediction >= 0.5 else "Negative"
    print(f"--- Review #{i+1} ---")
    print("Predicted:", predicted_label)
    print("Actual:   ", "Positive" if y_test[i] == 1 else "Negative")
    print("Review:", review_text[:300], "...")  # Print first 300 characters
    print("-" * 80)


Sample Predictions:

--- Review #6 ---
Predicted: Positive
Actual:    Positive
Review: i'm absolutely disgusted this movie isn't being sold all who love this movie should email disney and increase the demand for it they'd eventually have to sell it then i'd buy copies for everybody i know everything and everybody in this movie did a good job and i haven't figured out why disney hasn't ...
--------------------------------------------------------------------------------
--- Review #7 ---
Predicted: Positive
Actual:    Positive
Review: later used by frank in mr deeds goes to town and meet john but in no one individual is cast as a hero or heroine the story is told through a series of scenes that are combined in a special effect known as montage the editing and selection of short segments to produce a desired effect on the viewer d ...
--------------------------------------------------------------------------------
--- Review #8 ---
Predicted: Positive
Actual:    Negative
Review: the rich

In [48]:
# IF dataset is available in csv file then make some changes in above code

from tensorflow.keras.preprocessing.text import Tokenizer # add this one also


df = pd.read_csv('imdb_reviews.csv') 
df['sentiment'] = df['sentiment'].map({'positive': 1, 'negative': 0})

#  Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(df['review'], df['sentiment'], test_size=0.2, random_state=42)

# Tokenize the text
num_words = 10000  # Vocabulary size
tokenizer = Tokenizer(num_words=num_words, oov_token='<OOV>')
tokenizer.fit_on_texts(x_train)

x_train_seq = tokenizer.texts_to_sequences(x_train)
x_test_seq = tokenizer.texts_to_sequences(x_test)

# After that same steps like above
