In [18]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

def mse_loss(y_pred, y_true):
    return ((y_pred - y_true) ** 2).mean()

def mse_loss_derivative(y_pred, y_true):
    return 2 * (y_pred - y_true) / y_true.size

class GRUCell:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        # Initialize weights
        self.init_weights()
        
    def init_weights(self):
        self.Wz = np.random.randn(self.hidden_size, self.input_size) * 0.1
        self.Uz = np.random.randn(self.hidden_size, self.hidden_size) * 0.1
        self.bz = np.zeros((self.hidden_size, 1))
        
        self.Wr = np.random.randn(self.hidden_size, self.input_size) * 0.1
        self.Ur = np.random.randn(self.hidden_size, self.hidden_size) * 0.1
        self.br = np.zeros((self.hidden_size, 1))
        
        self.Wh = np.random.randn(self.hidden_size, self.input_size) * 0.1
        self.Uh = np.random.randn(self.hidden_size, self.hidden_size) * 0.1
        self.bh = np.zeros((self.hidden_size, 1))
        
        self.Wy = np.random.randn(self.output_size, self.hidden_size) * 0.1
        self.by = np.zeros((self.output_size, 1))
        
    def forward(self, x, h_prev):
        # Store values for backpropagation
        self.x, self.h_prev = x, h_prev
        
        # Update gate
        self.z = sigmoid(np.dot(self.Wz, x) + np.dot(self.Uz, h_prev) + self.bz)
        
        # Reset gate
        self.r = sigmoid(np.dot(self.Wr, x) + np.dot(self.Ur, h_prev) + self.br)
        
        # Candidate hidden state
        self.h_tilde = tanh(np.dot(self.Wh, x) + np.dot(self.Uh, self.r * h_prev) + self.bh)
        
        # Final hidden state
        h_next = self.z * h_prev + (1 - self.z) * self.h_tilde
        
        # Output
        y_pred = np.dot(self.Wy, h_next) + self.by
        
        return y_pred, h_next

    def backward(self, d_y_pred, d_h_next):
        # Gradient of the output layer
        d_Wy = np.dot(d_y_pred, self.h_prev.T)
        d_by = d_y_pred.sum(axis=1, keepdims=True)
        d_h_next += np.dot(self.Wy.T, d_y_pred)
        
        # Derivative of final hidden state
        d_z = d_h_next * (self.h_prev - self.h_tilde)
        d_h_prev = d_h_next * self.z
        d_h_tilde = d_h_next * (1 - self.z)
        
        # Derivatives of the gates
        d_h_tilde_raw = d_h_tilde * tanh_derivative(self.h_tilde)
        d_r = np.dot(self.Uh.T, d_h_tilde_raw) * self.h_prev
        d_h_prev += np.dot(self.Uh.T, d_h_tilde_raw) * self.r
        
        # Update weights and biases
        self.Wh -= self.learning_rate * np.dot(d_h_tilde_raw, self.x.T)
        self.Uh -= self.learning_rate * np.dot(d_h_tilde_raw, (self.r * self.h_prev).T)
        self.bh -= self.learning_rate * d_h_tilde_raw.sum(axis=1, keepdims=True)
        
        self.Wr -= self.learning_rate * np.dot(d_r * sigmoid_derivative(self.r), self.x.T)
        self.Ur -= self.learning_rate * np.dot(d_r * sigmoid_derivative(self.r), self.h_prev.T)
        self.br -= self.learning_rate * (d_r * sigmoid_derivative(self.r)).sum(axis=1, keepdims=True)
        
        self.Wz -= self.learning_rate * np.dot(d_z * sigmoid_derivative(self.z), self.x.T)
        self.Uz -= self.learning_rate * np.dot(d_z * sigmoid_derivative(self.z), self.h_prev.T)
        self.bz -= self.learning_rate * (d_z * sigmoid_derivative(self.z)).sum(axis=1, keepdims=True)
        
        self.Wy -= self.learning_rate * d_Wy
        self.by -= self.learning_rate * d_by

        return d_h_prev

    def train(self, inputs, targets, epochs):
        h_prev = np.zeros((self.hidden_size, 1))

        for epoch in range(epochs):
            loss = 0
            for x, y_true in zip(inputs, targets):
                x = x.reshape(-1, 1)  # Reshape x to (input_size, 1)
                y_true = y_true.reshape(-1, 1)  # Reshape y_true if needed

                # Forward pass
                y_pred, h_next = self.forward(x, h_prev)

                # Calculate loss (for monitoring)
                loss += mse_loss(y_pred, y_true)

                # Backpropagate error
                d_loss = mse_loss_derivative(y_pred, y_true)
                d_h_next = self.backward(d_loss, h_next)
                h_prev = h_next  # update state
                
            loss /= len(inputs)
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss}')

    def predict(self, inputs):
        h_prev = np.zeros((self.hidden_size, 1))
        y_pred, h_prev = self.forward(inputs, h_prev)
        return y_pred

In [19]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, GRU
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import regex as re
from gensim.models import FastText

def file_to_sentence_list(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    sentences = [sentence.strip() for sentence in re.split(r'(?<=[.!?])\s+', text) if sentence.strip()]
    return sentences

def load_fasttext_model(sentences, vector_size=50, window=10, min_count=2):
    fasttext_model = FastText(sentences=sentences, vector_size=vector_size, window=window, min_count=min_count)
    return fasttext_model

def create_embedding_matrix(word_index, fasttext_model):
    embedding_matrix = np.zeros((len(word_index) + 1, fasttext_model.vector_size))
    for word, i in word_index.items():
        if word in fasttext_model.wv:
            embedding_matrix[i] = fasttext_model.wv[word]
    return embedding_matrix

# Read and process the text data
file_path = 'pizza.txt'
text_data = file_to_sentence_list(file_path)

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
word_index = tokenizer.word_index
total_words = len(word_index) + 1

# Create input sequences
input_sequences = []
for line in text_data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences and prepare input data
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
X, y = input_sequences[:, :-1], input_sequences[:, -1]

# Load or train FastText model
sentences = [text.split() for text in text_data]
fasttext_model = load_fasttext_model(sentences)

# Create an embedding matrix
embedding_matrix = create_embedding_matrix(tokenizer.word_index, fasttext_model)

In [20]:
# X = tf.keras.utils.to_categorical(X, num_classes=total_words) 
# # Convert target data to one-hot encoding 
# y = tf.keras.utils.to_categorical(y, num_classes=total_words) 

# gru_cell = GRUCell(687, 50, 1, 0.01)
# gru_cell.train(X, y, 10)

In [21]:
X[0][0]

0

In [33]:
input_size = 10
hidden_size = 10
output_size = 1  
epochs = 1000
learning_rate = 0.01

# Example synthetic data
inputs = [np.random.randn(input_size, 1) for _ in range(1000)]
targets = [np.random.randn(output_size, 1) for _ in range(1000)]
gru_cell = GRUCell(input_size, hidden_size, output_size, learning_rate)
gru_cell.train(inputs, targets, epochs)

Epoch 1/1000, Loss: 0.9962634010208418
Epoch 2/1000, Loss: 0.9930102674022204
Epoch 3/1000, Loss: 0.9927827072386674
Epoch 4/1000, Loss: 0.9927381201390651
Epoch 5/1000, Loss: 0.9927296444865114
Epoch 6/1000, Loss: 0.9927279354077234
Epoch 7/1000, Loss: 0.9927275678995211
Epoch 8/1000, Loss: 0.9927274850733313
Epoch 9/1000, Loss: 0.992727465828162
Epoch 10/1000, Loss: 0.9927274612683314
Epoch 11/1000, Loss: 0.9927274601738714
Epoch 12/1000, Loss: 0.9927274599087508
Epoch 13/1000, Loss: 0.992727459844075
Epoch 14/1000, Loss: 0.9927274598282079
Epoch 15/1000, Loss: 0.9927274598242933
Epoch 16/1000, Loss: 0.9927274598233254
Epoch 17/1000, Loss: 0.9927274598230846
Epoch 18/1000, Loss: 0.9927274598230236
Epoch 19/1000, Loss: 0.9927274598230101
Epoch 20/1000, Loss: 0.992727459823005
Epoch 21/1000, Loss: 0.9927274598230036
Epoch 22/1000, Loss: 0.9927274598230039
Epoch 23/1000, Loss: 0.9927274598230041
Epoch 24/1000, Loss: 0.9927274598230041
Epoch 25/1000, Loss: 0.9927274598230041
Epoch 26/100

In [32]:
for i in range(10):
    print(gru_cell.predict(inputs[i]))
    print(targets[i])

[[0.10697423]]
[[0.2797318]]
[[0.11363818]]
[[1.87875115]]
[[0.10856516]]
[[-0.46354534]]
[[0.10933387]]
[[-0.22320698]]
[[0.11786535]]
[[-2.97064795]]
[[0.10823431]]
[[1.80252715]]
[[0.11553784]]
[[-0.76841153]]
[[0.11073224]]
[[-0.48079582]]
[[0.10488125]]
[[0.64474239]]
[[0.11326006]]
[[-0.42607185]]


In [24]:


# Convert lists to numpy arrays for TensorFlow compatibility
inputs = np.array(inputs).reshape(-1, 1, input_size)  # Reshape to [batch, timesteps, feature]
targets = np.array(targets).reshape(-1, output_size)

# Define the GRU model
model = tf.keras.models.Sequential([
    tf.keras.layers.GRU(hidden_size, input_shape=(1, input_size)),
    tf.keras.layers.Dense(output_size)
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss='mse')  # Mean Squared Error for regression tasks

# Train the model
model.fit(inputs, targets, epochs=epochs)

# Optionally, you can print the model summary
print(model.summary())

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_1 (GRU)                 (None, 10)                660       
                                                                 
 dense_1 (Dense)             (None, 1)                 11        
                                                                 
Total params: 671 (2.62 KB)
Trainable params: 671 (2.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [25]:
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences 
import tensorflow as tf 
import regex as re

def file_to_sentence_list(file_path): 
	with open(file_path, 'r') as file: 
		text = file.read() 

	# Splitting the text into sentences using 
	# delimiters like '.', '?', and '!' 
	sentences = [sentence.strip() for sentence in re.split( 
		r'(?<=[.!?])\s+', text) if sentence.strip()] 

	return sentences 

file_path = 'pizza.txt'
text_data = file_to_sentence_list(file_path) 

# Tokenize the text data 
tokenizer = Tokenizer() 
tokenizer.fit_on_texts(text_data) 
total_words = len(tokenizer.word_index) + 1

# Create input sequences 
input_sequences = [] 
for line in text_data: 
	token_list = tokenizer.texts_to_sequences([line])[0] 
	for i in range(1, len(token_list)): 
		n_gram_sequence = token_list[:i+1] 
		input_sequences.append(n_gram_sequence) 

# Pad sequences and split into predictors and label 
max_sequence_len = max([len(seq) for seq in input_sequences]) 
input_sequences = np.array(pad_sequences( 
	input_sequences, maxlen=max_sequence_len, padding='pre')) 
X, y = input_sequences[:, :-1], input_sequences[:, -1] 

# Convert target data to one-hot encoding 
y = tf.keras.utils.to_categorical(y, num_classes=total_words) 

In [26]:
inputs = np.array(X)
inputs = np.array([input.reshape(39, 1) for input in inputs])

targets = np.array(y)
targets = np.array([target.reshape(687, 1) for target in targets])

input_size = 39
hidden_size = 39
output_size = 687
epochs = 10
learning_rate = 0.01

In [27]:
gru_cell = GRUCell(input_size, hidden_size, output_size, learning_rate)
gru_cell.train(inputs, targets, epochs)

  return 1 / (1 + np.exp(-x))


Epoch 1/10, Loss: 0.39602644654507496
Epoch 2/10, Loss: 0.37672297632235563
Epoch 3/10, Loss: 0.35795658234776323
Epoch 4/10, Loss: 0.34041299938129277
Epoch 5/10, Loss: 0.32202253456581315
Epoch 6/10, Loss: 0.307406330924317
Epoch 7/10, Loss: 0.29041442433325193
Epoch 8/10, Loss: 0.27713118354734867
Epoch 9/10, Loss: 0.26294894463282686
Epoch 10/10, Loss: 0.2502672092751405
