In [None]:
import tensorflow as tf
from datasets import load_dataset
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np


# Load Dataset

In [None]:

dataset = load_dataset("tweet_eval", "sentiment")


train_texts = dataset['train']['text']
train_labels = dataset['train']['label']

test_texts = dataset['test']['text']
test_labels = dataset['test']['label']

# Preprocess (Tokenize & Pad)

In [None]:
vocab_size = 10000
max_len = 100
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(train_texts)

x_train = tokenizer.texts_to_sequences(train_texts)
x_test = tokenizer.texts_to_sequences(test_texts)

x_train = pad_sequences(x_train, maxlen=max_len, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_len, padding='post', truncating='post')

y_train = np.array(train_labels)
y_test = np.array(test_labels)



# Build LSTM Model

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=64),  
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(3, activation='softmax')
])


# Compile and Train

In [None]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))


# Evaluate

In [None]:
loss, acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {acc * 100:.2f}%")


In [None]:
sample_text = input("Enter the text: ")
sample_seq = tokenizer.texts_to_sequences([sample_text])  
sample_pad = pad_sequences(sample_seq, maxlen=max_len, padding='post')

pred = model.predict(sample_pad)
class_names = ['negative', 'neutral', 'positive']

print(f"{sample_text} => {class_names[np.argmax(pred)]}")
