In [1]:
import matplotlib.pyplot as plt
import numpy as np
from typing import List, Dict, Tuple

class RNN:
    def __init__(self, input_size: int, hidden_size: int, vocab_size: int, 
                 activation: str = 'tanh', embedding_type: str = 'onehot'):
        """
        RNN for next token prediction with configurable activation and embedding options
        
        Args:
            input_size: Size of input features/embeddings
            hidden_size: Size of hidden state
            vocab_size: Size of the vocabulary for output prediction
            activation: Activation function ('tanh', 'relu')
            embedding_type: Text representation method ('onehot', 'random')
        """
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.activation = activation
        self.embedding_type = embedding_type

        # Input to hidden
        self.W_xh = np.random.randn(hidden_size, input_size) * 0.01
        # Hidden to hidden
        self.W_hh = np.random.randn(hidden_size, hidden_size) * 0.01
        # Hidden to output
        self.W_hy = np.random.randn(vocab_size, hidden_size) * 0.01
        self.b = np.zeros(hidden_size)
        self.by = np.zeros(vocab_size)

        # Initialize embeddings
        self.embeddings = self._init_embeddings()

    def _init_embeddings(self) -> np.ndarray:
        """Initialize text embedding matrix based on selected type"""
        if self.embedding_type == 'onehot':
            return np.eye(self.input_size)
        elif self.embedding_type == 'random':
            return np.random.randn(self.input_size, self.input_size) * 0.1
        else:
            raise ValueError(f"Unknown embedding type: {self.embedding_type}")

    def _get_activation(self, x: np.ndarray) -> np.ndarray:
        if self.activation == 'tanh':
            return np.tanh(x)
        elif self.activation == 'relu':
            return np.maximum(0, x)
        else:
            raise ValueError(f"Unsupported activation: {self.activation}")

    def forward(self, sequence: List[int]) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        Process tokenized sequence through the RNN for prediction
        
        Args:
            sequence: List of indices representing the input text
        
        Returns:
            Tuple: (hidden_states, outputs, final_hidden_state)
        """
        embedded_seq = [self.embeddings[idx] for idx in sequence]
        
        timesteps = len(embedded_seq)
        H = np.zeros((timesteps + 1, self.hidden_size))  # +1 for initial state
        O = np.zeros((timesteps, self.vocab_size))
        h_prev = np.zeros(self.hidden_size)

        for t in range(timesteps):
            x_t = embedded_seq[t]
            h_t = self._get_activation(
                np.dot(self.W_xh, x_t) + 
                np.dot(self.W_hh, h_prev) + 
                self.b
            )
            H[t + 1] = h_t  # Store hidden state for t+1
            o_t = np.dot(self.W_hy, h_t) + self.by
            O[t] = o_t
            h_prev = h_t

        return H, O, h_prev

    def predict_next_token(self, sequence: List[int]) -> int:
        """Predict the next token given a sequence."""
        _, outputs, _ = self.forward(sequence)
        last_output = outputs[-1]
        return np.argmax(last_output)

def tokenize_and_build_vocab(corpus: str) -> Tuple[List[str], Dict[str, int], Dict[int, str]]:
    tokens = corpus.lower().split()
    vocab = {word: idx for idx, word in enumerate(sorted(set(tokens)))}
    inv_vocab = {v: k for k, v in vocab.items()}
    return tokens, vocab, inv_vocab

d:\Python\APPS\Anaconda\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
d:\Python\APPS\Anaconda\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll


In [5]:
# Example usage
corpus = '''One day, a little girl named Lily found a needle in her room. She knew it was difficult to play with it because it was sharp. Lily wanted to share the needle with her mom, so she could sew a button on her shirt. Lily went to her mom and said, "Mom, I found this needle. Can you share it with me and sew my shirt?" Her mom smiled and said, "Yes, Lily, we can share the needle and fix your shirt." Together, they shared the needle and sewed the button on Lily's shirt. It was not difficult for them because they were sharing and helping each other. After they finished, Lily thanked her mom for sharing the needle and fixing her shirt. They both felt happy because they had shared and worked together. Once upon a time, there was a little car named Beep. Beep loved to go fast and play in the sun. Beep was a healthy car because he always had good fuel. Good fuel made Beep happy and strong. One day, Beep was driving in the park when he saw a big tree. The tree had many leaves that were falling. Beep liked how the leaves fall and wanted to play with them. Beep drove under the tree and watched the leaves fall on him. He laughed and beeped his horn. Beep played with the falling leaves all day. When it was time to go home, Beep knew he needed more fuel. He went to the fuel place and got more healthy fuel. Now, Beep was ready to go fast and play again the next day. And Beep lived happily ever after. One day, a little fish named Fin was swimming near the shore. He saw a big crab and wanted to be friends. "Hi, I am Fin. Do you want to play?" asked the little fish. The crab looked at Fin and said, "No, I don't want to play. I am cold and I don't feel fine." Fin felt sad but wanted to help the crab feel better. He swam away and thought of a plan. He remembered that the sun could make things warm. So, Fin swam to the top of the water and called to the sun, "Please, sun, help my new friend feel fine and not freeze!" The sun heard Fin's call and shone its warm light on the shore. The crab started to feel better and not so cold. He saw Fin and said, "Thank you, little fish, for making me feel fine. I don't feel like I will freeze now. Let's play together!" And so, Fin and the crab played and became good friends. Once upon a time, in a land full of trees, there was a little cherry tree. The cherry tree was very sad because it did not have any friends. All the other trees were big and strong, but the cherry tree was small and weak. The cherry tree was envious of the big trees. One day, the cherry tree felt a tickle in its branches. It was a little spring wind. The wind told the cherry tree not to be sad. The wind said, "You are special because you have sweet cherries that everyone loves." The cherry tree started to feel a little better. As time went on, the cherry tree grew more and more cherries. All the animals in the land came to eat the cherries and play under the cherry tree. The cherry tree was happy because it had many friends now. The cherry tree learned that being different can be a good thing. And they all lived happily ever after. Once upon a time, in a small town, there was a troubled little girl named Lily. She was always sad because she lost her favorite toy, a triangle. She looked everywhere in her house but could not find it. One sunny day, Lily went to the park to play. She saw a big puddle of water and thought her triangle might be there. She put her hand in the water to soak it and looked for her toy. She felt something at the bottom of the puddle. Lily pulled it out and saw that it was her triangle! She was so happy that she found it. From that day on, Lily was never troubled again. She played with her triangle every day and always kept it close to her. And when she saw puddles, she would smile and remember how she found her toy.'''
tokens, vocab, inv_vocab = tokenize_and_build_vocab(corpus)

# Initialize RNN model
rnn = RNN(
    input_size=len(vocab),  # Size of vocabulary determines input size
    hidden_size=64,
    vocab_size=len(vocab),
    activation='tanh',
    embedding_type='random'
)

# Example sequence for prediction (we'll predict the next token after "the")
sequence = [vocab[word] for word in ["once"]]

# Predict next token
predicted_index = rnn.predict_next_token(sequence)
predicted_word = inv_vocab[predicted_index]

print(f"Predicted next word after 'the': {predicted_word}")

# Note: Training loop would go here to actually train the model for better predictions

Predicted next word after 'the': let's
