<a href="https://colab.research.google.com/github/Sifatkhan-1915020/deeplearning-/blob/main/LSTM%20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

In [10]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2

In [3]:
def initialize_lstm_parameters(input_size, hidden_size):
    parameters = {}
    parameters['Wf'] = np.random.randn(hidden_size, input_size + hidden_size) * 0.01
    parameters['bf'] = np.zeros((hidden_size, 1))
    parameters['Wi'] = np.random.randn(hidden_size, input_size + hidden_size) * 0.01
    parameters['bi'] = np.zeros((hidden_size, 1))
    parameters['Wo'] = np.random.randn(hidden_size, input_size + hidden_size) * 0.01
    parameters['bo'] = np.zeros((hidden_size, 1))
    parameters['Wc'] = np.random.randn(hidden_size, input_size + hidden_size) * 0.01
    parameters['bc'] = np.zeros((hidden_size, 1))

    return parameters


In [4]:
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    Wf = parameters['Wf']
    bf = parameters['bf']
    Wi = parameters['Wi']
    bi = parameters['bi']
    Wo = parameters['Wo']
    bo = parameters['bo']
    Wc = parameters['Wc']
    bc = parameters['bc']

    concat = np.concatenate((a_prev, xt), axis=0)

    ft = sigmoid(np.dot(Wf, concat) + bf)
    it = sigmoid(np.dot(Wi, concat) + bi)
    cct = tanh(np.dot(Wc, concat) + bc)
    c_next = ft * c_prev + it * cct
    ot = sigmoid(np.dot(Wo, concat) + bo)
    a_next = ot * tanh(c_next)

    return a_next, c_next


In [5]:
def lstm_forward(x, a0, parameters):
    n_x, m, T_x = x.shape
    n_a, m = a0.shape

    a = np.zeros((n_a, m, T_x))
    c = np.zeros((n_a, m, T_x))
    a_next = a0
    c_next = np.zeros((n_a, m))

    for t in range(T_x):
        a_next, c_next = lstm_cell_forward(x[:,:,t], a_next, c_next, parameters)
        a[:,:,t] = a_next
        c[:,:,t] = c_next

    return a, c


In [9]:
def compute_loss(y, y_hat):
    loss = np.sum((y - y_hat) ** 2) / y.shape[1]
    return loss


In [7]:
def lstm_backward(da, caches, parameters):
    (a_next, c_next, a, c, ft, it, cct, ot) = caches[-1]
    n_a, m, T_x = da.shape

    gradients = {}

    gradients['dWf'] = np.zeros_like(parameters['Wf'])
    gradients['dbf'] = np.zeros_like(parameters['bf'])
    gradients['dWi'] = np.zeros_like(parameters['Wi'])
    gradients['dbi'] = np.zeros_like(parameters['bi'])
    gradients['dWo'] = np.zeros_like(parameters['Wo'])
    gradients['dbo'] = np.zeros_like(parameters['bo'])
    gradients['dWc'] = np.zeros_like(parameters['Wc'])
    gradients['dbc'] = np.zeros_like(parameters['bc'])

    da_prev = np.zeros_like(a_next)
    dc_prev = np.zeros_like(c_next)

    for t in reversed(range(T_x)):
        a_next, c_next, a_prev, c_prev, ft, it, cct, ot = caches[t]

        dot = da[:,:,t] * tanh(c_next) * sigmoid_derivative(ot)
        dft = da[:,:,t] * dc_prev * c_prev * sigmoid_derivative(ft)
        dit = da[:,:,t] * dc_prev * cct * sigmoid_derivative(it)
        dcct = da[:,:,t] * dc_prev * it * tanh_derivative(cct)

        concat = np.concatenate((a_prev, a[:,:,t]), axis=0)

        gradients['dWf'] += np.dot(dft, concat.T)
        gradients['dbf'] += np.sum(dft, axis=1, keepdims=True)
        gradients['dWi'] += np.dot(dit, concat.T)
        gradients['dbi'] += np.sum(dit, axis=1, keepdims=True)
        gradients['dWo'] += np.dot(dot, concat.T)
        gradients['dbo'] += np.sum(dot, axis=1, keepdims=True)
        gradients['dWc'] += np.dot(dcct, concat.T)
        gradients['dbc'] += np.sum(dcct, axis=1, keepdims=True)

        da_prev = np.dot(parameters['Wf'][:, :n_a].T, dft) + np.dot(parameters['Wi'][:, :n_a].T, dit) + np.dot(parameters['Wo'][:, :n_a].T, dot) + np.dot(parameters['Wc'][:, :n_a].T, dcct)
        dc_prev = ft * dc_prev + it * dcct * sigmoid_derivative(cct)

    return gradients


In [8]:
def update_parameters(parameters, gradients, learning_rate):
    parameters['Wf'] -= learning_rate * gradients['dWf']
    parameters['bf'] -= learning_rate * gradients['dbf']
    parameters['Wi'] -= learning_rate * gradients['dWi']
    parameters['bi'] -= learning_rate * gradients['dbi']
    parameters['Wo'] -= learning_rate * gradients['dWo']
    parameters['bo'] -= learning_rate * gradients['dbo']
    parameters['Wc'] -= learning_rate * gradients['dWc']
    parameters['bc'] -= learning_rate * gradients['dbc']

    return parameters


In [13]:
def train_lstm(x_train, y_train, parameters, epochs, learning_rate):
    n_x, m, T_x = x_train.shape
    n_y, m, T_y = y_train.shape

    loss_history = []

    for epoch in range(epochs):
        a0 = np.zeros((parameters['Wf'].shape[0], m))
        a, c, caches = lstm_forward(x_train, a0, parameters)
        y_hat = a[:n_y,:,:]

        loss = compute_loss(y_train, y_hat)
        loss_history.append(loss)

        da = y_hat - y_train

        gradients = lstm_backward(da, caches, parameters)

        parameters = update_parameters(parameters, gradients, learning_rate)

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")

    return parameters, loss_history


In [12]:
def test_lstm(x_test, parameters):
    a0 = np.zeros((parameters['Wf'].shape[0], x_test.shape[1]))
    a, c, _ = lstm_forward(x_test, a0, parameters)
    return a[:x_test.shape[0],:,:]
