In [1]:
import numpy as np

def viterbi(words, states, start_prob, trans_prob, emit_prob, vocab):
    n_states, n_obs = len(states), len(words)
    V, backpointer = np.zeros((n_states, n_obs)), np.zeros((n_states, n_obs), dtype=int)
    
    # Initialization
    for s in range(n_states):
        V[s, 0] = start_prob[s] * emit_prob[s, vocab.get(words[0], 0)]
    
    # Recursion
    for t in range(1, n_obs):
        for s in range(n_states):
            probs = V[:, t-1] * trans_prob[:, s] * emit_prob[s, vocab.get(words[t], 0)]
            V[s, t], backpointer[s, t] = np.max(probs), np.argmax(probs)
    
    # Termination
    best_last_state = np.argmax(V[:, -1])
    best_path = [best_last_state]
    for t in range(n_obs - 1, 0, -1):
        best_last_state = backpointer[best_last_state, t]
        best_path.insert(0, best_last_state)
    
    return [states[i] for i in best_path]

# Example usage
states = ['NOUN', 'VERB']
vocab = {'dog': 0, 'barks': 1}
start_prob = np.array([0.6, 0.4])
trans_prob = np.array([[0.7, 0.3], [0.4, 0.6]])
emit_prob = np.array([[0.9, 0.1], [0.2, 0.8]])

sentence = ['dog', 'barks']
predicted_tags = viterbi(sentence, states, start_prob, trans_prob, emit_prob, vocab)
print("Predicted POS tags:", predicted_tags)


Predicted POS tags: ['NOUN', 'VERB']


In [None]:
import numpy as np
# Define RNN class
class SimpleRNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        # Initialize weights using Xavier/Glorot initialization
        self.Wx = np.random.randn(hidden_size, input_size) * np.sqrt(2.0 / input_size)
        self.Wh = np.random.randn(hidden_size, hidden_size) * np.sqrt(2.0 / hidden_size)
        self.Wy = np.random.randn(output_size, hidden_size) * np.sqrt(2.0 / hidden_size)

        # Bias terms
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs):
        """
        Forward pass through the RNN.
        """
        h = np.zeros((self.hidden_size, 1))  # Initial hidden state
        self.last_inputs = inputs
        self.last_hs = {0: h}

        # Forward pass through time steps
        for t, x in enumerate(inputs):
            x = x.reshape(-1, 1)  # Ensure column vector
            h = np.tanh(np.dot(self.Wx, x) + np.dot(self.Wh, h) + self.bh)
            self.last_hs[t + 1] = h  
            
        y = np.dot(self.Wy, h) + self.by
        return y, h

    def backward(self, d_y, lr=0.01):
        """
        Backpropagation Through Time (BPTT).
        """
        n = len(self.last_inputs)

        # Gradients initialization
        dWx = np.zeros_like(self.Wx)
        dWh = np.zeros_like(self.Wh)
        dWy = np.zeros_like(self.Wy)
        dbh = np.zeros_like(self.bh)
        dby = np.zeros_like(self.by)

        dh_next = np.zeros_like(self.last_hs[0])

        for t in reversed(range(n)):
            x = self.last_inputs[t].reshape(-1, 1)
            h = self.last_hs[t + 1]

            dWy += np.dot(d_y, h.T)
            dby += d_y

            dh = np.dot(self.Wy.T, d_y) + dh_next
            dh_raw = (1 - h ** 2) * dh  # tanh derivative

            dbh += dh_raw
            dWx += np.dot(dh_raw, x.T)
            dWh += np.dot(dh_raw, self.last_hs[t].T)

            dh_next = np.dot(self.Wh.T, dh_raw)

        # Gradient descent update
        for param, dparam in zip([self.Wx, self.Wh, self.Wy, self.bh, self.by],
                                 [dWx, dWh, dWy, dbh, dby]):
            param -= lr * dparam  # Update parameters

    def train(self, X_train, Y_train, epochs=100):
        """
        Train the RNN using Stochastic Gradient Descent (SGD).
        """
        for epoch in range(epochs):
            loss = 0
            for x_seq, y_true in zip(X_train, Y_train):
                y_pred, _ = self.forward(x_seq)
                loss += np.square(y_pred - y_true).sum()

                # Compute gradients and update weights
                d_y = y_pred - y_true  # Mean squared error gradient
                self.backward(d_y)

            if epoch % 10 == 0:
                print(f"Epoch {epoch}: Loss = {loss:.4f}")

    def predict(self, X):
        """
        Predict output for a given input sequence.
        """
        y_pred, _ = self.forward(X)
        return y_pred

# Generate simple sequence data (time series)
np.random.seed(42)
data = np.sin(np.linspace(0, 10, 100))  # Sinusoidal wave data

# Prepare dataset (sequence length = 5)
seq_length = 5
X_train = [data[i:i + seq_length] for i in range(len(data) - seq_length)]
Y_train = [data[i + seq_length] for i in range(len(data) - seq_length)]

# Convert to numpy arrays
X_train = np.array(X_train)
Y_train = np.array(Y_train).reshape(-1, 1)

# Initialize RNN
rnn = SimpleRNN(input_size=1, hidden_size=10, output_size=1, learning_rate=0.01)

# Train RNN
rnn.train(X_train, Y_train, epochs=100)

# Test prediction
test_seq = X_train[0]  # Use the first sequence for testing
pred = rnn.predict(test_seq)
print(f"Predicted Value: {pred[0,0]:.4f}, Actual Value: {Y_train[0,0]:.4f}")
