In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
torch.manual_seed(0)
np.random.seed(0)

In [None]:
"""
args:
    n: number of points to generate
    xdata: generated X points
    ydata: generated Y points

    -> For each point (X,Y) ,first randomizes the length of input, then selects 2 indices where x2=1 and appends to a list  
"""
class Dataset:
    def __init__(self,nop):
        self.n=nop
        self.xdata=[]
        self.ydata=[]
    
    def create_data(self):
        for i in range(self.n):
            leng=np.random.randint(4,10)
            a=np.random.randint(0,leng)
            b=np.random.randint(0,leng)
            while(a==b):
                b=np.random.randint(0,leng)
            tx=[]
            sum=0
            for i in range(leng):
                if(i==a or i==b):
                    x=torch.tensor([np.random.random(),1])
                    sum=sum+x[0]
                else:
                    x=torch.tensor([np.random.random(),0])
                tx.append(x)
            self.xdata.append(tx)
            self.ydata.append(torch.tensor(sum))




In [None]:
"""
    layer: number of layers 
    k: dimension of the hidden state
    U: matrix to multiply with input
    V: matrix to multiply with hidden state 
    W: matrix for getting output
    bh: bias for hidden state
    by:  bias for output 

    forward_pass-> function which takes X and outputs predicted Y
    tavgloss-> takes a test dataset and returns average loss(between predicted y and actual y) for all test data po
"""
class Elmon_RNN:
    def __init__(self,k):
        self.layer=1
        self.k=k
        self.U=torch.rand(k,2,requires_grad=True)
        self.V=torch.rand(k,k,requires_grad=True)
        self.W=torch.rand(1,k,requires_grad=True)
        self.bh=torch.rand(k,requires_grad=True)
        self.by=torch.rand(1,requires_grad=True)
    
    def forward_pass(self,X):
        h=[torch.zeros(self.k)]
        for i in range(len(X)):
            l1=torch.tanh(torch.matmul(self.U,X[i])+torch.matmul(self.V,h[i])+self.bh)
            h.append(l1)
        return torch.matmul(self.W,h[-1])+self.by
    
    def tavgloss(self,test_dataset):
        loss=torch.nn.MSELoss()
        print(test_dataset.xdata)
        score=0
        for i in range(len(test_dataset.xdata)):
            y_pred=self.forward_pass(test_dataset.xdata[i])
            y_target=test_dataset.ydata[i]
            score+=loss(y_pred,y_target).item()
        return score/len(test_dataset.xdata)

In [None]:
"creating test dataset of 5500 points"
data=Dataset(5500)
data.create_data()

In [None]:
""" 
Initializing an Elmon_RNN model and training it with the training data 
generated above using l.backward and optimizer.step  
"""
model=Elmon_RNN(8)
learning_rate=0.01
loss=torch.nn.MSELoss()
print(model.U)
optimizer=torch.optim.Adam([{"params":[model.U,model.V,model.W,model.bh,model.by]}],lr=learning_rate)
loss1=[]
n_iters=20
for epoch in range(n_iters):
    for pts in range(len(data.xdata)):
        optimizer.zero_grad()
        y_pred=model.forward_pass(data.xdata[pts])
        l=loss(y_pred,data.ydata[pts])
        l.backward()
        optimizer.step()
    loss1.append(model.tavgloss(data))
print(model.U)

In [None]:
"""Creating a test data set to calculate avgloss by trained model"""
test_data=Dataset(1000)
test_data.create_data()
model.tavgloss(test_data)
    

In [None]:
"""
Making classes for the gates used in LSTM
"""
class forget_gate:
    def __init__(self,k):
        self.Wf=torch.rand(k,k,requires_grad=True)
        self.Uf=torch.rand(k,2,requires_grad=True)
        self.bf=torch.rand(k,requires_grad=True)
    def forget(self,h,x):
        return torch.sigmoid(torch.matmul(self.Wf,h)+torch.matmul(self.Uf,x)+self.bf)

class read_gate:
    def __init__(self,k):
        self.Wo=torch.rand(k,k,requires_grad=True)
        self.Uo=torch.rand(k,2,requires_grad=True)
        self.bo=torch.rand(k,requires_grad=True)
    def read(self,h,x):
        return torch.sigmoid(torch.matmul(self.Wo,h)+torch.matmul(self.Uo,x)+self.bo)

class write_gate:
    def __init__(self,k):
        self.Wi=torch.rand(k,k,requires_grad=True)
        self.Ui=torch.rand(k,2,requires_grad=True)
        self.bi=torch.rand(k,requires_grad=True)
    def write(self,h,x):
        return torch.sigmoid(torch.matmul(self.Wi,h)+torch.matmul(self.Ui,x)+self.bi)
    
ft=forget_gate(8)
ot=read_gate(8)
it=write_gate(8)

In [None]:
"""
    layer: number of layers 
    k: dimension of the hidden state
    U: matrix to multiply with input
    V: matrix to multiply with hidden state 
    W: matrix for getting output
    bh: bias for hidden state
    ft: forget gate of the LSTM
    ot: read gate of the LSTM
    it: write gate of the LSTM

    forward_pass-> function which takes X and outputs predicted Y
    tavgloss-> takes a test dataset and returns average loss(between predicted y and actual y) for all test data points
"""
class LSTM:
    def __init__(self,k,ft,ot,it):
        self.layer=1
        self.k=k
        self.U=torch.rand(k,2,requires_grad=True)
        self.V=torch.rand(k,k,requires_grad=True)
        self.W=torch.rand(1,k,requires_grad=True)
        self.bh=torch.rand(k,requires_grad=True)
        self.ft=ft
        self.ot=ot
        self.it=it
    
    def forward_pass(self,X):
        s_t=[]
        h=[torch.zeros(self.k)]
        s=[torch.zeros(self.k)]
        for i in range(len(X)):
            l1=torch.sigmoid(torch.matmul(self.U,X[i])+torch.matmul(self.V,h[i])+self.bh)
            s_t.append(l1)
            s.append(self.ft.forget(h[i],X[i])*s[i]+self.it.write(h[i],X[i])*s_t[i])
            h.append(self.ot.read(h[i],X[i])*torch.sigmoid(s[i]))
        return s[-1]
    
    def tavgloss(self,test_dataset):
        loss=torch.nn.MSELoss()
        print(test_dataset.xdata)
        score=0
        for i in range(len(test_dataset.xdata)):
            y_pred=self.forward_pass(test_dataset.xdata[i])
            y_target=test_dataset.ydata[i]
            score+=loss(y_pred,y_target).item()
        return score/len(test_dataset.xdata)

In [None]:
""" 
Initializing an LSTM model and training it with the training data 
generated above using l.backward and optimizer.step  also storing avgloss after each epoch in a list
"""
model2=LSTM(8,ft,ot,it)
learning_rate=0.01
loss=torch.nn.MSELoss()
print(model.U)
optimizer=torch.optim.Adam([{"params":[model2.U,model2.V,model2.W,model2.bh,model2.ft.Wf,model2.ft.Uf,model2.ot.Wo,model2.ot.Uo,model2.it.Wi,model2.it.Ui,model2.ot.bo,model2.it.bi,model2.ft.bf]}],lr=learning_rate)
n_iters=20
loss2=[]
for epoch in range(n_iters):
    for pts in range(len(data.xdata)):
        optimizer.zero_grad()
        y_pred=model2.forward_pass(data.xdata[pts])
        l=loss(y_pred,data.ydata[pts])
        l.backward()
        optimizer.step()
    loss2.append(model2.tavgloss(data))
print(model.U)

In [None]:
"""
avgloss on testdata by trained model
"""
model2.tavgloss(test_data)

In [None]:
"""
Making classes for the gates used in GRU
"""
class z_gate:
     def __init__(self,k):
        self.Wz=torch.rand(k,k,requires_grad=True)
        self.Uz=torch.rand(k,2,requires_grad=True)
        self.bz=torch.rand(k,requires_grad=True)
     def zfun(self,h,x):
        return torch.sigmoid(torch.matmul(self.Wz,h)+torch.matmul(self.Uz,x)+self.bz)

class r_gate:
     def __init__(self,k):
        self.Wr=torch.rand(k,k,requires_grad=True)
        self.Ur=torch.rand(k,2,requires_grad=True)
        self.br=torch.rand(k,requires_grad=True)
     def rfun(self,h,x):
        return torch.sigmoid(torch.matmul(self.Wr,h)+torch.matmul(self.Ur,x)+self.br)

In [None]:
"""
    layer: number of layers 
    k: dimension of the hidden state
    U: matrix to multiply with input
    V: matrix to multiply with hidden state 
    W: matrix for getting output
    bh: bias for hidden state
    zt: z gate of GRU
    rt: r gate of GRU

    forward_pass-> function which takes X and outputs predicted Y
    tavgloss-> takes a test dataset and returns average loss(between predicted y and actual y) for all test data points
"""
class GRU:
   def __init__(self,k,zt,rt):
        self.layer=1
        self.k=k
        self.U=torch.rand(k,2,requires_grad=True)
        self.V=torch.rand(k,k,requires_grad=True)
        self.W=torch.rand(1,k,requires_grad=True)
        self.bh=torch.rand(k,requires_grad=True) 
        self.zt=zt
        self.rt=rt

   def forward_pass(self,X):
       h=[torch.zeros(self.k)]
       h_o=[]
       for i in range(len(X)):
           z_t=self.zt.zfun(h[i],X[i])
           r_t=self.rt.rfun(h[i],X[i])
           h_o.append(torch.tanh(torch.matmul(self.U,X[i])+torch.matmul(self.V,h[i]*r_t)+self.bh))
           h.append((1-z_t)*h[i]+z_t*h_o[i])
       return h[-1]
   
   def tavgloss(self,test_dataset):
        loss=torch.nn.MSELoss()
        print(test_dataset.xdata)
        score=0
        for i in range(len(test_dataset.xdata)):
            y_pred=self.forward_pass(test_dataset.xdata[i])
            y_target=test_dataset.ydata[i]
            score+=loss(y_pred,y_target).item()
        return score/len(test_dataset.xdata)       

In [None]:
""" 
Initializing an GRU model and training it with the training data 
generated above using l.backward and optimizer.step  also storing avgloss after each epoch in a list
"""
zt=z_gate(8)
rt=r_gate(8)
model3=GRU(8,zt,rt)
learning_rate=0.01
loss=torch.nn.MSELoss()
print(model.U)
optimizer=torch.optim.Adam([{"params":[model3.U,model3.V,model3.W,model3.bh,model3.rt.Wr,model3.rt.Ur,model3.zt.Wz,model3.zt.Uz,model3.zt.bz,model3.rt.br]}],lr=learning_rate)
n_iters=20
loss3=[]
for epoch in range(n_iters):
    for pts in range(len(data.xdata)):
        optimizer.zero_grad()
        y_pred=model3.forward_pass(data.xdata[pts])
        l=loss(y_pred,data.ydata[pts])
        l.backward()
        optimizer.step()
    loss3.append(model3.tavgloss(data))
print(model.U)

In [None]:
"""
avgloss on testdata by trained model
"""
model3.tavgloss(test_data)

In [None]:
"""
Plotting loss after each epoch of each model
"""
ax=range(n_iters)
plt.plot(ax,loss1,label='Elmon_RNN')
plt.plot(ax,loss2,color='r',label='LSTM')
plt.plot(ax,loss3,color='orange',label='GRU')
plt.xlabel('number of epochs')
plt.ylabel('loss after each epoch')
plt.legend()
plt.grid()
plt.show()