In [1]:
import os
import urllib.request
import zipfile
import numpy as np

# Define the URL for GloVe embeddings and the target file path
glove_url = "http://nlp.stanford.edu/data/glove.6B.zip"
glove_zip = "glove.6B.zip"
glove_file = "glove.6B.50d.txt"

# Check if the GloVe file is already downloaded
if not os.path.exists(glove_file):
    print("Downloading GloVe embeddings...")
    urllib.request.urlretrieve(glove_url, glove_zip)
    print("Download completed. Extracting files...")

    # Extract the zip file
    with zipfile.ZipFile(glove_zip, 'r') as zip_ref:
        zip_ref.extractall()
    print("Extraction completed.")

# Load the GloVe embeddings
print("Loading GloVe embeddings...")
embeddings = {}
with open(glove_file, 'r', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.array(values[1:], dtype='float32')
        embeddings[word] = vector

print(f"Loaded {len(embeddings)} word vectors.")

# Test the embeddings
print("Vector for 'king':", embeddings['king'])


Downloading GloVe embeddings...
Download completed. Extracting files...
Extraction completed.
Loading GloVe embeddings...
Loaded 400000 word vectors.
Vector for 'king': [ 0.50451   0.68607  -0.59517  -0.022801  0.60046  -0.13498  -0.08813
  0.47377  -0.61798  -0.31012  -0.076666  1.493    -0.034189 -0.98173
  0.68229   0.81722  -0.51874  -0.31503  -0.55809   0.66421   0.1961
 -0.13495  -0.11476  -0.30344   0.41177  -2.223    -1.0756   -1.0783
 -0.34354   0.33505   1.9927   -0.04234  -0.64319   0.71125   0.49159
  0.16754   0.34344  -0.25663  -0.8523    0.1661    0.40102   1.1685
 -1.0137   -0.21585  -0.15155   0.78321  -0.91241  -1.6106   -0.64426
 -0.51042 ]


In [2]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense

# Extended dataset
texts = [
    "I love this product", "This is the worst experience",
    "Amazing quality and service", "Not worth the price",
    "I absolutely hate it", "The product is excellent",
    "Terrible and disappointing experience", "Highly recommend this item",
    "Not satisfied with the quality", "The service was wonderful",
    "I regret buying this", "Best purchase I've made",
    "It's okay, not great", "Superb quality, loved it",
    "Horrible experience, never buying again", "Will definitely buy again"
]
labels = np.array([1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])  # 1: Positive, 0: Negative

# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=10)

# Build the LSTM model
model = Sequential()
model.add(Embedding(input_dim=1000, output_dim=32, input_length=10))
model.add(LSTM(64))  # Increased LSTM units for better learning
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(padded_sequences, labels, epochs=50, batch_size=4, verbose=1)

# Test the model
test_texts = [
    "I hate this product",  # Expected: Negative
    "I love this item",     # Expected: Positive
    "Terrible quality",     # Expected: Negative
    "Highly recommend it"   # Expected: Positive
]
test_sequences = tokenizer.texts_to_sequences(test_texts)
padded_test_sequences = pad_sequences(test_sequences, maxlen=10)

# Predict sentiment
predictions = model.predict(padded_test_sequences)
for i, text in enumerate(test_texts):
    sentiment = "Positive" if predictions[i][0] >= 0.5 else "Negative"
    print(f"Text: '{text}' -> Sentiment: {sentiment} (Score: {predictions[i][0]:.4f})")





Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.4083 - loss: 0.6948
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4917 - loss: 0.6908 
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8250 - loss: 0.6871 
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8417 - loss: 0.6855 
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8833 - loss: 0.6813 
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.6747 
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.6677 
Epoch 8/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.6587 
Epoch 9/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [3]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Embedding, Dense

# Input text
text = "the quick brown fox jumps over the lazy dog"

# Preprocessing the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1

# Convert text to sequences of integers
encoded_text = tokenizer.texts_to_sequences([text])[0]

# Generate input-output pairs
sequence_length = 4  # Number of words in the input sequence
sequences = []
next_words = []

for i in range(len(encoded_text) - sequence_length):
    sequences.append(encoded_text[i:i + sequence_length])
    next_words.append(encoded_text[i + sequence_length])

# Convert to NumPy arrays
X = np.array(sequences)
y = np.array(next_words)

# Build the LSTM model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=10, input_length=sequence_length))
model.add(LSTM(50, activation='tanh'))
model.add(Dense(vocab_size, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=100, verbose=1)

# Make a prediction
test_input = ["quick", "brown", "fox", "jumps"]
test_sequence = tokenizer.texts_to_sequences([test_input])[0]
test_sequence = pad_sequences([test_sequence], maxlen=sequence_length)

predicted_word_index = np.argmax(model.predict(test_sequence), axis=-1)[0]
predicted_word = tokenizer.index_word[predicted_word_index]

print(f"Input sequence: {' '.join(test_input)}")
print(f"Predicted next word: {predicted_word}")


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.0000e+00 - loss: 2.1994
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - accuracy: 0.0000e+00 - loss: 2.1962
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.2000 - loss: 2.1930
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.4000 - loss: 2.1898
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.4000 - loss: 2.1867
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.4000 - loss: 2.1835
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.2000 - loss: 2.1803
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.2000 - loss: 2.1770
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━

In [4]:
import numpy as np
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Embedding
from keras.utils import to_categorical

# Input text
text = "natural language"
chars = sorted(list(set(text)))  # Unique characters
char_to_index = {char: idx for idx, char in enumerate(chars)}
index_to_char = {idx: char for char, idx in char_to_index.items()}

print(char_to_index)
print(index_to_char)

# Convert text to sequences of integers
seq_length = 4  # Length of input sequences
sequences = []
next_chars = []

for i in range(len(text) - seq_length):
    sequences.append(text[i:i + seq_length])
    next_chars.append(text[i + seq_length])

# Convert sequences to numerical representation
X = np.array([[char_to_index[char] for char in seq] for seq in sequences])
y = np.array([char_to_index[char] for char in next_chars])

# One-hot encode the output
y = to_categorical(y, num_classes=len(chars))

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=len(chars), output_dim=8, input_length=seq_length))
model.add(SimpleRNN(32, activation='tanh'))
model.add(Dense(len(chars), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=100, batch_size=2, verbose=1)

# Make a prediction
input_sequence = "natur"
input_sequence_encoded = np.array([[char_to_index[char] for char in input_sequence]])
predicted_char_idx = np.argmax(model.predict(input_sequence_encoded))
predicted_char = index_to_char[predicted_char_idx]

print(f"Input sequence: {input_sequence}")



{' ': 0, 'a': 1, 'e': 2, 'g': 3, 'l': 4, 'n': 5, 'r': 6, 't': 7, 'u': 8}
{0: ' ', 1: 'a', 2: 'e', 3: 'g', 4: 'l', 5: 'n', 6: 'r', 7: 't', 8: 'u'}
Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.1798 - loss: 2.2072    
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2655 - loss: 2.1696      
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3524 - loss: 2.1526 
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4762 - loss: 2.1470      
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6869 - loss: 2.0912  
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4905 - loss: 2.0532  
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5143 - loss: 2.0208  
Epoch 8/100
