In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import gensim.downloader as api

# Load the Gensim pre-trained word2vec model
gensim_model = api.load("word2vec-google-news-300")

# Sample dataset for demonstration
texts = [
    "I love machine learning and NLP",
    "Deep learning models are powerful",
    "I enjoy using TensorFlow for deep learning",
    "Word embeddings can capture semantic meanings",
    "Natural language processing is fascinating"
]
labels = ["positive", "positive", "positive", "neutral", "neutral"]

# Encode the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# Tokenize the texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences
max_sequence_length = max(len(seq) for seq in sequences)
X_data = pad_sequences(sequences, maxlen=max_sequence_length)

# Split the dataset
X_train, X_val, y_train, y_val = train_test_split(X_data, labels, test_size=0.2, random_state=42)

# Prepare the embedding matrix
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = gensim_model.vector_size
embedding_matrix = np.zeros((vocab_size, embedding_dim))

for word, i in tokenizer.word_index.items():
    if word in gensim_model.key_to_index:
        embedding_matrix[i] = gensim_model[word]

# Define the TensorFlow model with the Gensim embeddings
embedding_layer = Embedding(
    input_dim=vocab_size,
    output_dim=embedding_dim,
    weights=[embedding_matrix],
    input_length=max_sequence_length,
    trainable=False  # Set to True if you want to fine-tune the embeddings
)

model = Sequential([
    embedding_layer,
    LSTM(128, return_sequences=True),
    Dropout(0.2),
    LSTM(64),
    Dense(1, activation="sigmoid")  # Sigmoid for binary classification
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Display the model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=2, validation_data=(X_val, y_val))

# Evaluate the model on validation data
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {val_accuracy:.2f}")






Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.6667 - loss: 0.6946 - val_accuracy: 0.0000e+00 - val_loss: 0.7095
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.6667 - loss: 0.6404 - val_accuracy: 1.0000 - val_loss: 0.6862
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 1.0000 - loss: 0.5808 - val_accuracy: 1.0000 - val_loss: 0.6603
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 1.0000 - loss: 0.5118 - val_accuracy: 1.0000 - val_loss: 0.6289
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 1.0000 - loss: 0.4234 - val_accuracy: 1.0000 - val_loss: 0.5853
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 1.0000 - loss: 0.3316 - val_accuracy: 1.0000 - val_loss: 0.5437
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━

In [3]:
# Function to preprocess the input text and make predictions
def predict_sentiment(text):
    # Convert text to sequence
    sequence = tokenizer.texts_to_sequences([text])
    # Pad the sequence to match the input shape of the model
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length)
    # Get the prediction
    prediction = model.predict(padded_sequence)[0][0]
    
    # Interpret the result
    if prediction > 0.5:
        return "positive", prediction
    else:
        return "neutral", prediction

# Example inputs to test the function
test_sentences = [
    "I really enjoy learning about AI!",
    "This project was very challenging.",
    "I am not sure about this approach."
]

# Make predictions for the test inputs
for sentence in test_sentences:
    sentiment, confidence = predict_sentiment(sentence)
    print(f"Sentence: '{sentence}'")
    print(f"Predicted Sentiment: {sentiment}, Confidence: {confidence:.2f}")
    print("-" * 50)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 309ms/step
Sentence: 'I really enjoy learning about AI!'
Predicted Sentiment: positive, Confidence: 0.86
--------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Sentence: 'This project was very challenging.'
Predicted Sentiment: neutral, Confidence: 0.48
--------------------------------------------------
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Sentence: 'I am not sure about this approach.'
Predicted Sentiment: positive, Confidence: 0.55
--------------------------------------------------
