In [4]:
from dataclasses import dataclass
from typing import List, Tuple, Any

import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split

@dataclass
class ModelConfig:
    max_words: int = 1000
    max_len: int = 100
    embedding_dim: int = 32
    lstm_units_1: int = 64
    lstm_units_2: int = 32
    dense_units: int = 16
    test_size: float = 0.2
    random_state: int = 42

def preprocess_text_data(texts: List[str], labels: List[int], config: ModelConfig) -> Tuple[np.ndarray, np.ndarray]:

    tokenizer = Tokenizer(num_words=config.max_words)
    tokenizer.fit_on_texts(texts)
    sequences = tokenizer.texts_to_sequences(texts)
    X = pad_sequences(sequences, maxlen=config.max_len)
    y = np.array(labels)
    return X, y

def create_sentiment_model(config: ModelConfig) -> Sequential:

    model = Sequential([
        Embedding(config.max_words, config.embedding_dim, input_length=config.max_len),
        LSTM(config.lstm_units_1, return_sequences=True),
        LSTM(config.lstm_units_2),
        Dense(config.dense_units, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

def train_sentiment_model(texts: List[str], labels: List[int], config: ModelConfig) -> Tuple[Sequential, Any]:

    # Preprocess data
    X, y = preprocess_text_data(texts, labels, config)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, 
        test_size=config.test_size, 
        random_state=config.random_state
    )
    
    # Create and train model
    model = create_sentiment_model(config)
    history = model.fit(
        X_train, y_train,
        epochs=10,
        batch_size=32,
        validation_split=0.2
    )
    
    # Evaluate model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test accuracy: {accuracy:.4f}")
    print(f"Test loss: {loss:.4f}")
    
    return model, history

In [5]:
# Sample usage
config = ModelConfig()
texts =[
        "This movie is great!",
        "Terrible experience",
        "I loved it",
        "Very disappointing",
        "Amazing film, highly recommended",
        "Waste of time",
        "Outstanding performance",
        "Boring and predictable",
        "Best movie ever",
        "Don't watch this"
    ]
labels = [1, 0, 1, 0,1,0,1,0,1,0]
model, history = train_sentiment_model(texts, labels, config)
model 
history


Epoch 1/10


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.5000 - loss: 0.6930 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step - accuracy: 0.5000 - loss: 0.6926 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step - accuracy: 0.5000 - loss: 0.6929 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step - accuracy: 0.5000 - loss: 0.6920 - val_accuracy: 0.5000 - val_loss: 0.6930
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - accuracy: 0.5000 - loss: 0.6916 - val_accuracy: 0.5000 - val_loss: 0.6930
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step - accuracy: 0.5000 - loss: 0.6911 - val_accuracy: 0.5000 - val_loss: 0.6929
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x26a189cf020>

In [3]:
test_texts =["This movie is great!", "Terrible experience", "I loved it", "Very disappointing"]
    # Prepare test data
tokenizer = Tokenizer(num_words=config.max_words)
text_tokenizer = tokenizer.fit_on_texts(test_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)
test_data = pad_sequences(test_sequences, maxlen=config.max_len)
    
    # Make predictions
predictions = model.predict(test_data)
    
print("\nPredictions for new texts:")
for text, pred in zip(test_texts, predictions):
    sentiment = "Positive" if pred > 0.5 else "Negative"
    confidence = pred if pred > 0.5 else 1 - pred
    print(f"Text: '{text}'")
    print(f"Sentiment: {sentiment} (confidence: {float(confidence[0]):.4f})\n")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350ms/step

Predictions for new texts:
Text: 'This movie is great!'
Sentiment: Positive (confidence: 0.5027)

Text: 'Terrible experience'
Sentiment: Negative (confidence: 0.5032)

Text: 'I loved it'
Sentiment: Positive (confidence: 0.5012)

Text: 'Very disappointing'
Sentiment: Negative (confidence: 0.5041)

