In [1]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

def mse_loss(y_pred, y_true):
    return ((y_pred - y_true) ** 2).mean()

def mse_loss_derivative(y_pred, y_true):
    return 2 * (y_pred - y_true) / y_true.size

class GRUCell:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        # Initialize weights
        self.init_weights()
        
    def init_weights(self):
        self.Wz = np.random.randn(self.hidden_size, self.input_size) * 0.1
        self.Uz = np.random.randn(self.hidden_size, self.hidden_size) * 0.1
        self.bz = np.zeros((self.hidden_size, 1))
        
        self.Wr = np.random.randn(self.hidden_size, self.input_size) * 0.1
        self.Ur = np.random.randn(self.hidden_size, self.hidden_size) * 0.1
        self.br = np.zeros((self.hidden_size, 1))
        
        self.Wh = np.random.randn(self.hidden_size, self.input_size) * 0.1
        self.Uh = np.random.randn(self.hidden_size, self.hidden_size) * 0.1
        self.bh = np.zeros((self.hidden_size, 1))
        
        self.Wy = np.random.randn(self.output_size, self.hidden_size) * 0.1
        self.by = np.zeros((self.output_size, 1))
        
    def forward(self, x, h_prev):
        # Store values for backpropagation
        self.x, self.h_prev = x, h_prev
        
        # Update gate
        self.z = sigmoid(np.dot(self.Wz, x) + np.dot(self.Uz, h_prev) + self.bz)
        
        # Reset gate
        self.r = sigmoid(np.dot(self.Wr, x) + np.dot(self.Ur, h_prev) + self.br)
        
        # Candidate hidden state
        self.h_tilde = tanh(np.dot(self.Wh, x) + np.dot(self.Uh, self.r * h_prev) + self.bh)
        
        # Final hidden state
        h_next = self.z * h_prev + (1 - self.z) * self.h_tilde
        
        # Output
        y_pred = np.dot(self.Wy, h_next) + self.by
        
        return y_pred, h_next

    def backward(self, d_y_pred, d_h_next):
        # Gradient of the output layer
        d_Wy = np.dot(d_y_pred, self.h_prev.T)
        d_by = d_y_pred.sum(axis=1, keepdims=True)
        d_h_next += np.dot(self.Wy.T, d_y_pred)
        
        # Derivative of final hidden state
        d_z = d_h_next * (self.h_prev - self.h_tilde)
        d_h_prev = d_h_next * self.z
        d_h_tilde = d_h_next * (1 - self.z)
        
        # Derivatives of the gates
        d_h_tilde_raw = d_h_tilde * tanh_derivative(self.h_tilde)
        d_r = np.dot(self.Uh.T, d_h_tilde_raw) * self.h_prev
        d_h_prev += np.dot(self.Uh.T, d_h_tilde_raw) * self.r
        
        # Update weights and biases
        self.Wh -= self.learning_rate * np.dot(d_h_tilde_raw, self.x.T)
        self.Uh -= self.learning_rate * np.dot(d_h_tilde_raw, (self.r * self.h_prev).T)
        self.bh -= self.learning_rate * d_h_tilde_raw.sum(axis=1, keepdims=True)
        
        self.Wr -= self.learning_rate * np.dot(d_r * sigmoid_derivative(self.r), self.x.T)
        self.Ur -= self.learning_rate * np.dot(d_r * sigmoid_derivative(self.r), self.h_prev.T)
        self.br -= self.learning_rate * (d_r * sigmoid_derivative(self.r)).sum(axis=1, keepdims=True)
        
        self.Wz -= self.learning_rate * np.dot(d_z * sigmoid_derivative(self.z), self.x.T)
        self.Uz -= self.learning_rate * np.dot(d_z * sigmoid_derivative(self.z), self.h_prev.T)
        self.bz -= self.learning_rate * (d_z * sigmoid_derivative(self.z)).sum(axis=1, keepdims=True)
        
        self.Wy -= self.learning_rate * d_Wy
        self.by -= self.learning_rate * d_by

        return d_h_prev

    def train(self, inputs, targets, epochs):
        h_prev = np.zeros((self.hidden_size, 1))

        for epoch in range(epochs):
            loss = 0
            for x, y_true in zip(inputs, targets):
                x = x.reshape(-1, 1)  # Reshape x to (input_size, 1)
                y_true = y_true.reshape(-1, 1)  # Reshape y_true if needed

                # Forward pass
                y_pred, h_next = self.forward(x, h_prev)

                # Calculate loss (for monitoring)
                loss += mse_loss(y_pred, y_true)

                # Backpropagate error
                d_loss = mse_loss_derivative(y_pred, y_true)
                d_h_next = self.backward(d_loss, h_next)
                h_prev = h_next  # update state
                
            loss /= len(inputs)
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss}')

    def predict(self, inputs):
        h_prev = np.zeros((self.hidden_size, 1))
        y_pred, h_prev = self.forward(inputs, h_prev)
        return y_pred

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, GRU
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import regex as re
from gensim.models import FastText

def file_to_sentence_list(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    sentences = [sentence.strip() for sentence in re.split(r'(?<=[.!?])\s+', text) if sentence.strip()]
    return sentences

def load_fasttext_model(sentences, vector_size=50, window=10, min_count=2):
    fasttext_model = FastText(sentences=sentences, vector_size=vector_size, window=window, min_count=min_count)
    return fasttext_model

def create_embedding_matrix(word_index, fasttext_model):
    embedding_matrix = np.zeros((len(word_index) + 1, fasttext_model.vector_size))
    for word, i in word_index.items():
        if word in fasttext_model.wv:
            embedding_matrix[i] = fasttext_model.wv[word]
    return embedding_matrix

# Read and process the text data
file_path = 'pizza.txt'
text_data = file_to_sentence_list(file_path)

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
word_index = tokenizer.word_index
total_words = len(word_index) + 1

# Create input sequences
input_sequences = []
for line in text_data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences and prepare input data
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
X, y = input_sequences[:, :-1], input_sequences[:, -1]

# Load or train FastText model
sentences = [text.split() for text in text_data]
fasttext_model = load_fasttext_model(sentences)

# Create an embedding matrix
embedding_matrix = create_embedding_matrix(tokenizer.word_index, fasttext_model)




In [3]:
# X = tf.keras.utils.to_categorical(X, num_classes=total_words) 
# # Convert target data to one-hot encoding 
# y = tf.keras.utils.to_categorical(y, num_classes=total_words) 

# gru_cell = GRUCell(687, 50, 1, 0.01)
# gru_cell.train(X, y, 10)

In [4]:
X[0][0]

0

In [5]:
input_size = 10
hidden_size = 20
output_size = 1  
epochs = 100
learning_rate = 0.01

# Example synthetic data
inputs = np.random.randn(100, input_size)
targets = np.random.randn(100, output_size)
gru_cell = GRUCell(input_size, hidden_size, output_size, learning_rate)
gru_cell.train(inputs, targets, epochs)

Epoch 1/100, Loss: 0.8426334644902557
Epoch 2/100, Loss: 0.7734987152296402
Epoch 3/100, Loss: 0.7696668019041557
Epoch 4/100, Loss: 0.7694567880791351
Epoch 5/100, Loss: 0.7699106200920199
Epoch 6/100, Loss: 0.7706354511851271
Epoch 7/100, Loss: 0.7714825391675144
Epoch 8/100, Loss: 0.772346833238512
Epoch 9/100, Loss: 0.7731621399616222
Epoch 10/100, Loss: 0.7738947988854692
Epoch 11/100, Loss: 0.7745330967095606
Epoch 12/100, Loss: 0.7750780431292794
Epoch 13/100, Loss: 0.7755370549883354
Epoch 14/100, Loss: 0.7759201541022281
Epoch 15/100, Loss: 0.7762378748476428
Epoch 16/100, Loss: 0.7765002088310236
Epoch 17/100, Loss: 0.7767161376991887
Epoch 18/100, Loss: 0.7768934836738992
Epoch 19/100, Loss: 0.7770389233358357
Epoch 20/100, Loss: 0.7771580789579766
Epoch 21/100, Loss: 0.777255640807083
Epoch 22/100, Loss: 0.7773354957180022
Epoch 23/100, Loss: 0.7774008494374246
Epoch 24/100, Loss: 0.7774543370090153
Epoch 25/100, Loss: 0.7774981191999305
Epoch 26/100, Loss: 0.77753396496411

In [6]:
for i in range(5):
    print(inputs[i])
    print(gru_cell.predict(inputs[i]))
    print(targets[i])


[-0.52175082 -1.04291614  1.32487184 -0.81651301  0.67118204  1.66832641
 -0.64027934 -0.40204368  0.08868526  0.39260631]
[[0.06054527 0.06054527 0.06054527 0.06054527 0.06054527 0.06054527
  0.06054527 0.06054527 0.06054527 0.06054527 0.06054527 0.06054527
  0.06054527 0.06054527 0.06054527 0.06054527 0.06054527 0.06054527
  0.06054527 0.06054527]]
[1.31951572]
[-1.0020422  -0.62832827 -0.9667176  -1.82481963 -0.94748438 -0.45674069
 -1.18439951  0.83604301 -1.40795628 -1.07746102]
[[0.06054527 0.06054527 0.06054527 0.06054527 0.06054527 0.06054527
  0.06054527 0.06054527 0.06054527 0.06054527 0.06054527 0.06054527
  0.06054527 0.06054527 0.06054527 0.06054527 0.06054527 0.06054527
  0.06054527 0.06054527]]
[0.09298336]
[-0.98956853 -1.38083323  0.56325249 -1.48848276 -1.16414646 -0.24037772
  0.19020144 -0.47704574  0.45655185 -1.88914022]
[[0.06054527 0.06054527 0.06054527 0.06054527 0.06054527 0.06054527
  0.06054527 0.06054527 0.06054527 0.06054527 0.06054527 0.06054527
  0.06054

In [7]:


# Convert lists to numpy arrays for TensorFlow compatibility
inputs = np.array(inputs).reshape(-1, 1, input_size)  # Reshape to [batch, timesteps, feature]
targets = np.array(targets).reshape(-1, output_size)

# Define the GRU model
model = tf.keras.models.Sequential([
    tf.keras.layers.GRU(hidden_size, input_shape=(1, input_size)),
    tf.keras.layers.Dense(output_size)
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss='mse')  # Mean Squared Error for regression tasks

# Train the model
model.fit(inputs, targets, epochs=epochs)

# Optionally, you can print the model summary
print(model.summary())


Epoch 1/100

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 

In [8]:
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences 
import tensorflow as tf 
import regex as re

def file_to_sentence_list(file_path): 
	with open(file_path, 'r') as file: 
		text = file.read() 

	# Splitting the text into sentences using 
	# delimiters like '.', '?', and '!' 
	sentences = [sentence.strip() for sentence in re.split( 
		r'(?<=[.!?])\s+', text) if sentence.strip()] 

	return sentences 

file_path = 'pizza.txt'
text_data = file_to_sentence_list(file_path) 

# Tokenize the text data 
tokenizer = Tokenizer() 
tokenizer.fit_on_texts(text_data) 
total_words = len(tokenizer.word_index) + 1

# Create input sequences 
input_sequences = [] 
for line in text_data: 
	token_list = tokenizer.texts_to_sequences([line])[0] 
	for i in range(1, len(token_list)): 
		n_gram_sequence = token_list[:i+1] 
		input_sequences.append(n_gram_sequence) 

# Pad sequences and split into predictors and label 
max_sequence_len = max([len(seq) for seq in input_sequences]) 
input_sequences = np.array(pad_sequences( 
	input_sequences, maxlen=max_sequence_len, padding='pre')) 
X, y = input_sequences[:, :-1], input_sequences[:, -1] 

# Convert target data to one-hot encoding 
y = tf.keras.utils.to_categorical(y, num_classes=total_words) 

In [9]:
# inputs = np.array(X)
# inputs = np.array([input.reshape(39, 1) for input in inputs])

# targets = np.array(y)
# targets = np.array([target.reshape(687, 1) for target in targets])

# input_size = 39
# hidden_size = 39
# output_size = 687
# epochs = 10
# learning_rate = 0.01

In [10]:
# gru_cell = GRUCell(input_size, hidden_size, output_size, learning_rate)
# gru_cell.train(inputs, targets, epochs)