#N-Gram to Neural to RNN

In [None]:
%pip install nltk torch numpy scikit-learn

In [2]:
import nltk
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict, Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:
corpus = """
machine learning is a field of artificial intelligence that allows systems to learn from data
machine learning models improve automatically through experience
deep neural networks are a powerful type of machine learning model
recurrent neural networks are useful for sequential data processing
in natural language processing models need to understand long term dependencies
to understand long term dependencies models must remember earlier words in a sentence
if the weather is cold then wear a jacket
if the weather is hot then wear a hat
if there is not certainity of weather you can wear of your choice
"""

In [4]:
#N-Gram based models
import nltk
nltk.download('punkt_tab')
tokens = nltk.word_tokenize(corpus.lower())

vocab = sorted(set(tokens))
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {i: word for word, i in word_to_ix.items()}
vocab_size = len(vocab)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [5]:
bigrams = list(nltk.bigrams(tokens))
bigram_freq = defaultdict(Counter)

for w1, w2 in bigrams:
    bigram_freq[w1][w2] += 1

In [6]:
def predict_ngram(word):
    if word in bigram_freq:
        return bigram_freq[word].most_common(1)[0][0]
    return None

In [7]:
print("N-Gram Prediction:", predict_ngram("machine"))

N-Gram Prediction: learning


In [8]:
correct = 0
total = 0

for w1, w2 in bigrams:
    if predict_ngram(w1) == w2:
        correct += 1
    total += 1

ngram_accuracy = correct / total
print("N-Gram Accuracy:", ngram_accuracy)

N-Gram Accuracy: 0.7604166666666666


In [9]:
#Feed Forward Neural Network (NN)
context_size = 2
data = []

for i in range(len(tokens) - context_size):
    context = tokens[i:i+context_size]
    target = tokens[i+context_size]
    data.append((context, target))

In [10]:
#Convert to indices:
X = []
y = []

for context, target in data:
    X.append([word_to_ix[w] for w in context])
    y.append(word_to_ix[target])

X = torch.tensor(X)
y = torch.tensor(y)

In [11]:
#Neural network defining
class FeedForwardNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, context_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.fc1 = nn.Linear(context_size * embed_dim, 128)
        self.fc2 = nn.Linear(128, vocab_size)

    def forward(self, x):
        embeds = self.embedding(x).view(x.shape[0], -1)
        out = torch.relu(self.fc1(embeds))
        out = self.fc2(out)
        return out

In [12]:
#Model training
model_nn = FeedForwardNN(len(vocab), 50, context_size)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_nn.parameters(), lr=0.01)

for epoch in range(100):
    optimizer.zero_grad()
    output = model_nn(X)
    loss = loss_fn(output, y)
    loss.backward()
    optimizer.step()

print("NN Training Loss:", loss.item())

NN Training Loss: 0.0933239683508873


In [13]:
#prediction
with torch.no_grad():
    output = model_nn(X)
    _, preds = torch.max(output, 1)
    nn_accuracy = accuracy_score(y.numpy(), preds.numpy())

print("Feed Forward NN Accuracy:", nn_accuracy)

Feed Forward NN Accuracy: 0.9368421052631579


In [14]:
def predict_nn(context_words):
    indices = torch.tensor([[word_to_ix[w] for w in context_words]])
    output = model_nn(indices)
    _, pred = torch.max(output, 1)
    return ix_to_word[pred.item()]

In [15]:
##RNN Model: prepare sequential data
sequence_length = 3
sequences = []

for i in range(len(tokens) - sequence_length):
    seq = tokens[i:i+sequence_length]
    target = tokens[i+sequence_length]
    sequences.append((seq, target))

X_rnn = torch.tensor([[word_to_ix[w] for w in seq] for seq, _ in sequences])
y_rnn = torch.tensor([word_to_ix[target] for _, target in sequences])

In [16]:
class RNNModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        embeds = self.embedding(x)
        out, hidden = self.rnn(embeds)
        out = self.fc(out[:, -1, :])  # last timestep
        return out

In [17]:
#Train RNN model
model_rnn = RNNModel(len(vocab), 50, 128)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_rnn.parameters(), lr=0.01)

for epoch in range(150):
    optimizer.zero_grad()
    output = model_rnn(X_rnn)
    loss = loss_fn(output, y_rnn)
    loss.backward()
    optimizer.step()

print("RNN Final Loss:", loss.item())

RNN Final Loss: 0.0590660385787487


In [18]:
#evaluate
with torch.no_grad():
    output = model_rnn(X_rnn)
    _, preds = torch.max(output, 1)
    rnn_accuracy = accuracy_score(y_rnn.numpy(), preds.numpy())

print("Vanilla RNN Accuracy:", rnn_accuracy)

Vanilla RNN Accuracy: 0.9574468085106383


In [19]:
def predict_rnn(context_words):
    indices = torch.tensor([[word_to_ix[w] for w in context_words]])
    output = model_rnn(indices)
    _, pred = torch.max(output, 1)
    return ix_to_word[pred.item()]

In [20]:
#comparison
print("\nModel Comparison:")
print("------------------")
print("N-Gram Accuracy      :", ngram_accuracy)
print("FeedForward Accuracy :", nn_accuracy)
print("Vanilla RNN Accuracy :", rnn_accuracy)


Model Comparison:
------------------
N-Gram Accuracy      : 0.7604166666666666
FeedForward Accuracy : 0.9368421052631579
Vanilla RNN Accuracy : 0.9574468085106383


In [21]:
#prediction of words
test_sentence = "deep neural networks allows machine learning"
test_tokens = nltk.word_tokenize(test_sentence.lower())

In [22]:
print("N-Gram Prediction:", predict_ngram(test_tokens[-1]))
print("FeedForward Prediction:", predict_nn(test_tokens[-2:]))
print("RNN Prediction:", predict_rnn(test_tokens[-5:]))

N-Gram Prediction: is
FeedForward Prediction: models
RNN Prediction: model
