**The objective of this project is to build a simple language model (LLM) using TensorFlow and Keras for sentiment analysis. The model will classify text into positive or negative sentiment by leveraging deep learning techniques such as embeddings, attention mechanisms, and dropout for robust performance.**


In [None]:
!pip install tensorflow transformers




In [None]:
# Import necessary libraries
import tensorflow as tf  # TensorFlow for building the neural network model
from sklearn.model_selection import train_test_split  # For splitting the data into training and test sets
import numpy as np  # For numerical operations

# Example data (two sentences for each sentiment)
texts = [
    "I love this product!",  # Positive sentiment example
    "This is terrible.",     # Negative sentiment example
    "Fantastic service!",   # Positive sentiment example
    "I hate this experience."  # Negative sentiment example
]

# Corresponding labels: 1 for positive sentiment, 0 for negative sentiment
labels = [1, 0, 1, 0]

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.5, random_state=42)
# `train_test_split` function divides the dataset into training and test sets
# `test_size=0.5` means 50% of the data will be used for testing and 50% for training
# `random_state=42` ensures that the split is reproducible

# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=100, output_dim=8, input_length=5),  # Embedding layer for text
    # `input_dim=100`: Vocabulary size (the maximum number of unique words considered)
    # `output_dim=8`: Dimension of the dense embedding (size of the vector representation)
    # `input_length=5`: Length of input sequences (pad or truncate to this length)

    tf.keras.layers.GlobalAveragePooling1D(),  # Pooling layer to reduce the dimensionality of the data
    # `GlobalAveragePooling1D` calculates the average of the embeddings across the sequence length

    tf.keras.layers.Dense(10, activation='relu'),  # Hidden layer with 10 neurons and ReLU activation function
    # `Dense(10)`: Fully connected layer with 10 neurons
    # `activation='relu'`: ReLU activation function to introduce non-linearity

    tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
    # `Dense(1)`: Single neuron output layer
    # `activation='sigmoid'`: Sigmoid activation function for binary classification (outputs a probability between 0 and 1)
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# `optimizer='adam'`: Adam optimizer for adjusting weights
# `loss='binary_crossentropy'`: Loss function for binary classification
# `metrics=['accuracy']`: Accuracy metric to evaluate the model performance

# Tokenize the data
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=100)  # Limit vocab size to 100
# `Tokenizer(num_words=100)`: Tokenizer instance with a maximum vocabulary size of 100
tokenizer.fit_on_texts(X_train)  # Fit tokenizer on the training data

# Convert text to sequences of integers
X_train_seq = tokenizer.texts_to_sequences(X_train)  # Convert text data to sequences of integers
X_test_seq = tokenizer.texts_to_sequences(X_test)  # Convert test text data to sequences of integers

# Pad sequences
X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=5)  # Pad sequences to have the same length
X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=5)  # Adjust `maxlen` to the desired length for sequences

# Train the model
history = model.fit(X_train_pad, np.array(y_train), epochs=5, validation_data=(X_test_pad, np.array(y_test)))
# `X_train_pad`: Padded training sequences
# `y_train`: Labels for training data
# `epochs=5`: Number of epochs to train the model
# `validation_data=(X_test_pad, np.array(y_test))`: Validation data to evaluate model performance after each epoch

# Evaluate the model on the test set
results = model.evaluate(X_test_pad, np.array(y_test))
# `evaluate` method calculates the loss and accuracy on the test data
print(f"Test Loss: {results[0]}")  # Print the test loss
print(f"Test Accuracy: {results[1]}")  # Print the test accuracy

# Define a function to predict sentiment
def predict_sentiment(text, tokenizer, model, max_len):
    seq = tokenizer.texts_to_sequences([text])  # Convert text to sequence of integers
    padded_seq = tf.keras.preprocessing.sequence.pad_sequences(seq, maxlen=max_len)  # Pad the sequence
    prediction = model.predict(padded_seq)  # Get the model's prediction
    return 'Positive' if prediction[0] > 0.5 else 'Negative'  # Convert probability to sentiment label

# Example usage
text = "I am happy with the service."
print(f"Sentiment: {predict_sentiment(text, tokenizer, model, 5)}")  # Predict sentiment for a new sentence


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.7148082256317139
Test Accuracy: 0.0
Sentiment: Positive


In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Input, Embedding, MultiHeadAttention, LayerNormalization, Dropout, Dense, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data
sentences = [
    "I am not sure about this product.",
    "Absolutely fantastic!",
    "It’s an okay product.",
    "I would not recommend this to anyone.",
    "Very satisfied with my purchase."
]
labels = [0, 1, 0, 0, 1]  # 0: Negative, 1: Positive

# Convert labels to numpy array
labels = np.array(labels)

# Tokenizer for text preprocessing
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
padded_sequences = pad_sequences(sequences, maxlen=100)

# Model parameters
vocab_size = 10000
embedding_dim = 64
num_heads = 2
ff_dim = 128
dropout_rate = 0.3
max_len = 100

# Define the model
inputs = Input(shape=(max_len,))
embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim)(inputs)
attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim)(embedding_layer, embedding_layer)
x = LayerNormalization()(attention_output + embedding_layer)
x = Dropout(dropout_rate)(x)  # Dropout layer to prevent overfitting
ffn = Dense(ff_dim, activation='relu')(x)
x = Dropout(dropout_rate)(ffn)  # Dropout layer to prevent overfitting
x = LayerNormalization()(x + ffn)
x = GlobalAveragePooling1D()(x)
outputs = Dense(1, activation='sigmoid')(x)

# Compile the model with optimizer, loss function, and evaluation metric
optimizer = Adam(learning_rate=1e-4)
loss_fn = 'binary_crossentropy'
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

# Print model summary
model.summary()

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(
    padded_sequences, labels,
    epochs=10,
    batch_size=2,
    validation_split=0.2,  # Added validation split
    callbacks=[early_stopping]  # Added early stopping
)

# Test the model
test_sentences = [
    "I am not sure about this product.",
    "Absolutely fantastic!",
    "It’s an okay product.",
    "I would not recommend this to anyone.",
    "Very satisfied with my purchase."
]
test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded_sequences = pad_sequences(test_sequences, maxlen=max_len)

predictions = model.predict(test_padded_sequences)
for sentence, prediction in zip(test_sentences, predictions):
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    print(f"Sentence: '{sentence}' - Sentiment: {sentiment}")


Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 100)]                0         []                            
                                                                                                  
 embedding_6 (Embedding)     (None, 100, 64)              640000    ['input_4[0][0]']             
                                                                                                  
 multi_head_attention_4 (Mu  (None, 100, 64)              33216     ['embedding_6[0][0]',         
 ltiHeadAttention)                                                   'embedding_6[0][0]']         
                                                                                                  
 tf.__operators__.add_4 (TF  (None, 100, 64)              0         ['multi_head_attention_4