<a href="https://colab.research.google.com/github/BilawalBaloch/Tensorflow.NN/blob/main/NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np


In [3]:

# --- 1. Prepare a simple, hardcoded dataset ---
# A list of sentences
sentences = [
    'I love my dog',
    'I love my cat',
    'You are a great person',
    'I hate my dog',
    'You are a terrible person',
    'This is an amazing product',
    'The food was delicious',
    'The movie was terrible',
    'I feel so sad today',
    'It was a wonderful day',]

In [4]:
# Corresponding labels (0 for negative, 1 for positive)
labels = [1, 1, 1, 0, 0, 1, 1, 0, 0, 1]

# Convert lists to NumPy arrays
training_sentences = np.array(sentences)
training_labels = np.array(labels)

# --- 2. Tokenize the text data ---
# This converts words into numerical representations
# The num_words parameter limits the vocabulary size
vocab_size = 100
oov_token = "<oov>"  # Out-of-vocabulary token
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)

In [5]:
# Get the word index (a dictionary mapping words to integers)
word_index = tokenizer.word_index
print(f"Word Index (first 10): {dict(list(word_index.items())[:10])}\n")

# Convert sentences to sequences of integers
sequences = tokenizer.texts_to_sequences(training_sentences)

# --- 3. Pad the sequences to ensure uniform length ---
# Neural networks require input data of a consistent shape
max_length = 8
padding_type = 'post'  # 'post' means padding with zeros at the end
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding=padding_type)

print(f"Padded Sequences:\n{padded_sequences}\n")

Word Index (first 10): {'<oov>': 1, 'i': 2, 'my': 3, 'a': 4, 'was': 5, 'love': 6, 'dog': 7, 'you': 8, 'are': 9, 'person': 10}

Padded Sequences:
[[ 2  6  3  7  0  0  0  0]
 [ 2  6  3 13  0  0  0  0]
 [ 8  9  4 14 10  0  0  0]
 [ 2 15  3  7  0  0  0  0]
 [ 8  9  4 11 10  0  0  0]
 [16 17 18 19 20  0  0  0]
 [12 21  5 22  0  0  0  0]
 [12 23  5 11  0  0  0  0]
 [ 2 24 25 26 27  0  0  0]
 [28  5  4 29 30  0  0  0]]



In [6]:
# --- 4. Build a simple neural network model ---
embedding_dim = 16
model = keras.Sequential([
    # The Embedding layer learns a dense vector representation for each word
    keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    # GlobalAveragePooling1D averages the vectors, reducing each sequence to a single vector
    keras.layers.GlobalAveragePooling1D(),
    # The Dense layer is a standard fully connected layer
    keras.layers.Dense(16, activation='relu'),
    # The final Dense layer with a single neuron and sigmoid activation for binary classification
    keras.layers.Dense(1, activation='sigmoid')
])




In [7]:
# Print a summary of the model's architecture
model.summary()

# --- 5. Compile and train the model ---
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
num_epochs = 100
history = model.fit(padded_sequences, training_labels, epochs=num_epochs, verbose=2)

# --- 6. Use the trained model to make predictions ---
print("\n--- Making Predictions ---")
test_sentences = [
    "I am very happy today",
    "I'm feeling down",
    "that was great"
]


Epoch 1/100
1/1 - 1s - 1s/step - accuracy: 0.6000 - loss: 0.6924
Epoch 2/100
1/1 - 0s - 46ms/step - accuracy: 0.6000 - loss: 0.6917
Epoch 3/100
1/1 - 0s - 42ms/step - accuracy: 0.6000 - loss: 0.6910
Epoch 4/100
1/1 - 0s - 74ms/step - accuracy: 0.6000 - loss: 0.6904
Epoch 5/100
1/1 - 0s - 41ms/step - accuracy: 0.6000 - loss: 0.6897
Epoch 6/100
1/1 - 0s - 61ms/step - accuracy: 0.6000 - loss: 0.6891
Epoch 7/100
1/1 - 0s - 59ms/step - accuracy: 0.6000 - loss: 0.6884
Epoch 8/100
1/1 - 0s - 41ms/step - accuracy: 0.6000 - loss: 0.6877
Epoch 9/100
1/1 - 0s - 58ms/step - accuracy: 0.6000 - loss: 0.6869
Epoch 10/100
1/1 - 0s - 42ms/step - accuracy: 0.6000 - loss: 0.6862
Epoch 11/100
1/1 - 0s - 60ms/step - accuracy: 0.6000 - loss: 0.6855
Epoch 12/100
1/1 - 0s - 43ms/step - accuracy: 0.6000 - loss: 0.6849
Epoch 13/100
1/1 - 0s - 61ms/step - accuracy: 0.6000 - loss: 0.6842
Epoch 14/100
1/1 - 0s - 58ms/step - accuracy: 0.6000 - loss: 0.6835
Epoch 15/100
1/1 - 0s - 58ms/step - accuracy: 0.6000 - loss

In [8]:
# Preprocess the new sentences just like the training data
test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded_sequences = pad_sequences(test_sequences, maxlen=max_length, padding=padding_type)

# Predict the sentiment (0-1, where closer to 1 is positive)
predictions = model.predict(test_padded_sequences)

for i, sentence in enumerate(test_sentences):
    sentiment = "Positive" if predictions[i][0] > 0.5 else "Negative"
    print(f"'{sentence}' -> Prediction: {predictions[i][0]:.4f}, Sentiment: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
'I am very happy today' -> Prediction: 0.5680, Sentiment: Positive
'I'm feeling down' -> Prediction: 0.6485, Sentiment: Positive
'that was great' -> Prediction: 0.6929, Sentiment: Positive
