In [61]:
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
from collections import defaultdict

# Ensure you have the NLTK data downloaded
nltk.download('punkt')

class GRUCell:
    def __init__(self, input_size, hidden_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.W_z = np.random.randn(hidden_size, input_size)
        self.U_z = np.random.randn(hidden_size, hidden_size)
        self.b_z = np.zeros((hidden_size, 1))
        
        self.W_r = np.random.randn(hidden_size, input_size)
        self.U_r = np.random.randn(hidden_size, hidden_size)
        self.b_r = np.zeros((hidden_size, 1))
        
        self.W_h = np.random.randn(hidden_size, input_size)
        self.U_h = np.random.randn(hidden_size, hidden_size)
        self.b_h = np.zeros((hidden_size, 1))
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def tanh(self, x):
        return np.tanh(x)
    
    def forward(self, x, h_prev):
        z = self.sigmoid(self.W_z @ x + self.U_z @ h_prev + self.b_z)
        r = self.sigmoid(self.W_r @ x + self.U_r @ h_prev + self.b_r)
        h_tilde = self.tanh(self.W_h @ x + self.U_h @ (r * h_prev) + self.b_h)
        h = (1 - z) * h_prev + z * h_tilde
        return h, (h, z, r, h_tilde, h_prev, x)
    
    def backward(self, dh, cache):
        h, z, r, h_tilde, h_prev, x = cache
        
        dh_tilde = dh * z * (1 - h_tilde ** 2)
        dz = dh * (h_tilde - h_prev) * z * (1 - z)
        dr = (self.U_h.T @ dh_tilde) * h_prev * r * (1 - r)
        
        dW_z = dz @ x.T
        dU_z = dz @ h_prev.T
        db_z = dz.sum(axis=1, keepdims=True)
        
        dW_r = dr @ x.T
        dU_r = dr @ h_prev.T
        db_r = dr.sum(axis=1, keepdims=True)
        
        dW_h = dh_tilde @ x.T
        dU_h = dh_tilde @ (r * h_prev).T
        db_h = dh_tilde.sum(axis=1, keepdims=True)
        
        dx = self.W_z.T @ dz + self.W_r.T @ dr + self.W_h.T @ dh_tilde
        dh_prev = self.U_z.T @ dz + self.U_r.T @ dr + self.U_h.T @ (dh_tilde * r)
        
        return dx, dh_prev, dW_z, dU_z, db_z, dW_r, dU_r, db_r, dW_h, dU_h, db_h

class GRULayer:
    def __init__(self, input_size, hidden_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.cell = GRUCell(input_size, hidden_size)
    
    def forward(self, X):
        h = np.zeros((self.hidden_size, 1))
        h_seq = []
        caches = []
        for x in X:
            h, cache = self.cell.forward(x, h)
            h_seq.append(h)
            caches.append(cache)
        return h_seq, caches
    
    def backward(self, dh_seq, caches):
        dW_z, dU_z, db_z = np.zeros_like(self.cell.W_z), np.zeros_like(self.cell.U_z), np.zeros_like(self.cell.b_z)
        dW_r, dU_r, db_r = np.zeros_like(self.cell.W_r), np.zeros_like(self.cell.U_r), np.zeros_like(self.cell.b_r)
        dW_h, dU_h, db_h = np.zeros_like(self.cell.W_h), np.zeros_like(self.cell.U_h), np.zeros_like(self.cell.b_h)
        
        dh = np.zeros((self.hidden_size, 1))
        for t in reversed(range(len(dh_seq))):
            dh += dh_seq[t]
            dx, dh, dW_z_t, dU_z_t, db_z_t, dW_r_t, dU_r_t, db_r_t, dW_h_t, dU_h_t, db_h_t = self.cell.backward(dh, caches[t])
            dW_z += dW_z_t
            dU_z += dU_z_t
            db_z += db_z_t
            dW_r += dW_r_t
            dU_r += dU_r_t
            db_r += db_r_t
            dW_h += dW_h_t
            dU_h += dU_h_t
            db_h += db_h_t
        
        return dW_z, dU_z, db_z, dW_r, dU_r, db_r, dW_h, dU_h, db_h

class GRUModel:
    def __init__(self, input_size, hidden_size, output_size):
        self.hidden_size = hidden_size
        self.gru_layer = GRULayer(input_size, hidden_size)
        self.W_y = np.random.randn(output_size, hidden_size)
        self.b_y = np.zeros((output_size, 1))
    
    def forward(self, X):
        h_seq, caches = self.gru_layer.forward(X)
        y_seq = [self.W_y @ h + self.b_y for h in h_seq]
        return y_seq, caches
    
    def backward(self, dy_seq, caches):
        dh_seq = [self.W_y.T @ dy for dy in dy_seq]
        dW_y = sum(dy @ h.T for dy, (h, _, _, _, _, _) in zip(dy_seq, caches))
        db_y = sum(dy for dy in dy_seq)
        
        dW_z, dU_z, db_z, dW_r, dU_r, db_r, dW_h, dU_h, db_h = self.gru_layer.backward(dh_seq, caches)
        return dW_y, db_y, dW_z, dU_z, db_z, dW_r, dU_r, db_r, dW_h, dU_h, db_h
    
    def update(self, grads, lr):
        dW_y, db_y, dW_z, dU_z, db_z, dW_r, dU_r, db_r, dW_h, dU_h, db_h = grads
        self.W_y -= lr * dW_y
        self.b_y -= lr * db_y
        self.gru_layer.cell.W_z -= lr * dW_z
        self.gru_layer.cell.U_z -= lr * dU_z
        self.gru_layer.cell.b_z -= lr * db_z
        self.gru_layer.cell.W_r -= lr * dW_r
        self.gru_layer.cell.U_r -= lr * dU_r
        self.gru_layer.cell.b_r -= lr * db_r
        self.gru_layer.cell.W_h -= lr * dW_h
        self.gru_layer.cell.U_h -= lr * dU_h
        self.gru_layer.cell.b_h -= lr * db_h
    
    def train(self, X, Y, epochs, lr):
        for epoch in range(epochs):
            for x_seq, y_seq in zip(X, Y):
                y_pred, caches = self.forward(x_seq)
                loss = sum((y - y_pred) ** 2 for y, y_pred in zip(y_seq, y_pred))
                dy_seq = [2 * (y_pred - y) for y, y_pred in zip(y_seq, y_pred)]
                grads = self.backward(dy_seq, caches)
                self.update(grads, lr)
            print(f'Epoch {epoch + 1}, Loss: {loss}')

    def predict(self, X, h0=None):
        h = h0 if h0 is not None else np.zeros((self.hidden_size, 1))
        preds = []
        for x in X:
            h, _ = self.gru_layer.cell.forward(x, h)
            y = self.W_y @ h + self.b_y
            preds.append(y)
        return preds

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\K-Gen\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [62]:
import regex as re
# Helper functions for text preprocessing
def tokenize(text):
    return word_tokenize(text.lower())

def build_vocab(tokenized_texts):
    vocab = defaultdict(lambda: len(vocab))
    for text in tokenized_texts:
        for token in text:
            vocab[token]
    return vocab

def text_to_sequence(text, vocab):
    return [vocab[token] for token in text]

def sequence_to_matrix(seq, vocab_size):
    return np.eye(vocab_size)[seq].T

def file_to_sentence_list(file_path): 
	with open(file_path, 'r') as file: 
		text = file.read() 

	# Splitting the text into sentences using 
	# delimiters like '.', '?', and '!' 
	sentences = [sentence.strip() for sentence in re.split( 
		r'(?<=[.!?])\s+', text) if sentence.strip()] 

	return sentences 

file_path = 'pizza.txt'
text_data = file_to_sentence_list(file_path) 

In [63]:
text_data

['Pizza, the delectable and iconic dish that has transcended borders and captivated taste buds worldwide, is a testament to the extraordinary fusion of flavors, creativity, and cultural significance.',
 'Originating from the sun-kissed lands of Italy, pizza has evolved into an art form that unites people from diverse backgrounds in a shared love for its mouthwatering combinations.',
 'Its history stretches back centuries, with roots tracing back to ancient civilizations like the Greeks, Romans, and Egyptians, who all had their versions of flatbreads adorned with various ingredients.',
 'However, it was the vibrant city of Naples, Italy, that birthed the pizza we know and adore today.',
 'With its soft and chewy Neapolitan crust, topped with the perfect balance of tomatoes, mozzarella cheese, and fresh basil, the Margherita pizza pays homage to Queen Margherita of Italy and embodies the colors of the Italian flag.',
 'As pizza migrated from the shores of Naples, it found its way to the 

In [64]:
# Example text data
text = " ".join(text_data)
tokenized_text = tokenize(text)
vocab = build_vocab([tokenized_text])
vocab_size = len(vocab)

input_size = vocab_size  # Size of the input is now the vocabulary size
hidden_size = 20  # Size of the GRU hidden state
output_size = vocab_size  # Size of the output is also the vocabulary size

# Convert text to input-output sequences
seq = text_to_sequence(tokenized_text, vocab)
X = [sequence_to_matrix([seq[i]], vocab_size) for i in range(len(seq)-1)]
Y = [sequence_to_matrix([seq[i+1]], vocab_size) for i in range(len(seq)-1)]

# Initialize and train the model
model = GRUModel(input_size, hidden_size, output_size)
model.train([X], [Y], epochs=10, lr=0.01)

# Make a prediction
preds = model.predict([X[0]])
pred_tokens = [max(enumerate(pred.flatten()), key=lambda x: x[1])[0] for pred in preds]
pred_text = ' '.join([list(vocab.keys())[list(vocab.values()).index(idx)] for idx in pred_tokens])

Epoch 1, Loss: [[22709.42216138]
 [12505.13130003]
 [14912.71058532]
 [14985.85703245]
 [19560.71644004]
 [18900.49603919]
 [ 9676.58614951]
 [18028.18046415]
 [ 7452.72200061]
 [12067.34620182]
 [ 8789.92505157]
 [16551.44542743]
 [24251.0816084 ]
 [11486.96895905]
 [15675.16330987]
 [20775.76139185]
 [40456.45332941]
 [38909.30892151]
 [10112.9162531 ]
 [50376.46732689]
 [16444.02102985]
 [16084.3258384 ]
 [13819.27376713]
 [10857.98420481]
 [13454.10846467]
 [20756.82004146]
 [ 8409.70823289]
 [ 9158.33560377]
 [ 4823.46603729]
 [13858.73810381]
 [19188.43163249]
 [11346.5789958 ]
 [11958.05561973]
 [27196.9920658 ]
 [18348.19865662]
 [ 8866.73201521]
 [10676.35857387]
 [13782.91175484]
 [ 8573.68359054]
 [17257.34301771]
 [12606.67052661]
 [ 9803.84369232]
 [14444.78580557]
 [12299.69039345]
 [38239.60069324]
 [17580.88813514]
 [21075.80359297]
 [10618.82097419]
 [17187.4731572 ]
 [25165.79230571]
 [ 8986.52279932]
 [ 7117.59487744]
 [14895.01127753]
 [21469.15582392]
 [10880.76888

  return 1 / (1 + np.exp(-x))


Epoch 2, Loss: [[1.35336832e+07]
 [3.63706275e+06]
 [5.48750180e+06]
 [5.99002622e+06]
 [2.60253527e+07]
 [6.33551965e+06]
 [1.38209081e+07]
 [1.90119846e+07]
 [1.24813464e+06]
 [4.71423334e+06]
 [3.88422070e+06]
 [1.00689752e+07]
 [6.69344583e+06]
 [1.02575954e+07]
 [8.77419118e+06]
 [1.74149105e+07]
 [8.63865836e+06]
 [7.30885651e+06]
 [4.00962214e+06]
 [9.20946334e+07]
 [3.71056160e+06]
 [3.17701024e+06]
 [1.75719496e+07]
 [8.83892854e+06]
 [1.84078847e+07]
 [3.19555761e+07]
 [1.29802776e+06]
 [1.13360972e+06]
 [1.93804523e+06]
 [6.82362830e+06]
 [3.55443906e+07]
 [1.24989438e+06]
 [3.66346783e+06]
 [2.59317732e+07]
 [1.33792460e+07]
 [1.25997525e+07]
 [2.12183492e+06]
 [5.69165387e+06]
 [1.94283971e+06]
 [1.35492676e+07]
 [4.79990330e+06]
 [1.30889081e+07]
 [5.88235137e+06]
 [4.85841569e+06]
 [6.43292646e+07]
 [4.41261679e+06]
 [5.40268589e+06]
 [1.20168467e+07]
 [7.18491603e+06]
 [2.59586156e+07]
 [2.20262888e+06]
 [1.11782549e+06]
 [1.37787524e+07]
 [5.52002478e+07]
 [4.81723903e

In [65]:
print(pred_text)

captured


In [68]:
preds = X[0]
pred_tokens = [max(enumerate(pred.flatten()), key=lambda x: x[1])[0] for pred in preds]
pred_text = ' '.join([list(vocab.keys())[list(vocab.values()).index(idx)] for idx in pred_tokens])


In [69]:
pred_text

'pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza pizza piz