In [17]:
#Install Required Libraries
!pip install tensorflow keras nltk




In [19]:
#Load and Preprocess "Moby Dick" Text
import nltk
nltk.download('punkt_tab')
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import numpy as np
import nltk

# Load Moby Dick text
file_path = "/content/moby_dick_four_chapters.txt"
with open(file_path, "r", encoding="utf-8") as file:
    text = file.read()

# Download sentence tokenizer
nltk.download("punkt", force=True)

# Tokenize text into sentences
from nltk.tokenize import sent_tokenize
sentences = sent_tokenize(text)

# Print sample sentences
print("Sample Sentences:", sentences[:5])


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Sample Sentences: ['Call me Ishmael.', 'Some years ago--never mind how long\nprecisely--having little or no money in my purse, and nothing\nparticular to interest me on shore, I thought I would sail about a\nlittle and see the watery part of the world.', 'It is a way I have of\ndriving off the spleen and regulating the circulation.', "Whenever I\nfind myself growing grim about the mouth; whenever it is a damp,\ndrizzly November in my soul; whenever I find myself involuntarily\npausing before coffin warehouses, and bringing up the rear of every\nfuneral I meet; and especially whenever my hypos get such an upper\nhand of me, that it requires a strong moral principle to prevent me\nfrom deliberately stepping into the street, and methodically knocking\npeople's hats off--then, I account it high time to get to sea as soon\nas I can.", 'This is my substitute for pistol and ball.']


In [20]:
#Tokenize and Prepare the Data
# Tokenize the text
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)

# Convert sentences into sequences
sequences = tokenizer.texts_to_sequences(sentences)

# Pad sequences for uniformity
max_length = 50
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding="post")

# Print tokenized example
print("\nTokenized Example:", sequences[0])
print("Padded Example:", padded_sequences[0])



Tokenized Example: [559, 15, 221]
Padded Example: [559  15 221   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0]


In [21]:
#Define and Train the Sentiment Analysis Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Generate fake labels (0 = Negative, 1 = Positive)
# Ideally, we should use real sentiment labels from a dataset
labels = np.random.randint(0, 2, len(padded_sequences))

# Build the LSTM Model
model = Sequential([
    Embedding(input_dim=5000, output_dim=64, input_length=max_length),
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dense(10, activation="relu"),
    Dense(1, activation="sigmoid")  # Sigmoid for binary sentiment classification
])

# Compile the Model
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the Model
model.fit(padded_sequences, labels, epochs=5, batch_size=8)

print("\nModel Training Completed!")




Epoch 1/5
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 40ms/step - accuracy: 0.5311 - loss: 0.6939
Epoch 2/5
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 59ms/step - accuracy: 0.5522 - loss: 0.6927
Epoch 3/5
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 41ms/step - accuracy: 0.5631 - loss: 0.6893
Epoch 4/5
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - accuracy: 0.6297 - loss: 0.6585
Epoch 5/5
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 40ms/step - accuracy: 0.6210 - loss: 0.6320

Model Training Completed!


In [22]:
#Predict Sentiment for New Sentences
# Test sentences
test_sentences = [
    "The great white whale is a magnificent creature.",
    "I feel terrified and lost at sea."
]

# Tokenize and pad test sentences
test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding="post")

# Predict Sentiment
predictions = model.predict(test_padded)

# Print Predictions
for i, sentence in enumerate(test_sentences):
    sentiment = "Positive" if predictions[i] > 0.5 else "Negative"
    print(f"\nSentence: {sentence}")
    print(f"Predicted Sentiment: {sentiment}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 507ms/step

Sentence: The great white whale is a magnificent creature.
Predicted Sentiment: Positive

Sentence: I feel terrified and lost at sea.
Predicted Sentiment: Positive
