In [19]:
import hazm
import numpy as np
import pandas as pd
from hazm import sent_tokenize, word_tokenize
import nltk
from nltk.tokenize import word_tokenize as eng_tokenize
import pickle
import json
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn.functional as F
from tensorflow.keras.utils import pad_sequences
import keras

In [5]:
class dataset(Dataset):
    def __init__(self,en_data,fa_data,en_index,fa_index):
        super(dataset,self).__init__()
        for seq in en_data:
            for i in range(len(seq)):
                seq[i]=en_index[seq[i]]
        for seq in fa_data:
            for i in range(len(seq)):
                seq[i]=fa_index[seq[i]]
        self.en_data=torch.from_numpy(pad_sequences(en_data,padding="post",maxlen=10,value=1))
        self.fa_data=torch.from_numpy(pad_sequences(fa_data,padding="post",maxlen=10,value=1))
    def __len__(self):
        return len(self.en_data)
    def __getitem__(self,idx):
        return self.en_data[idx],self.fa_data[idx]

In [3]:
with open("./hugg_preprocessed/vocab_en", "rb") as fp:   # Unpickling
    vocab_en = pickle.load(fp)
with open("./hugg_preprocessed/vocab_fa", "rb") as fp:   # Unpickling
    vocab_fp = pickle.load(fp)
with open("./hugg_preprocessed/dataset_en", "rb") as fp:   # Unpickling
    dataset_en = pickle.load(fp)
with open("./hugg_preprocessed/dataset_fa", "rb") as fp:   # Unpickling
    dataset_fa = pickle.load(fp)
with open("./hugg_preprocessed/en_index.json", "r") as fp:   #Pickling
    en_index=json.load(fp)
with open("./hugg_preprocessed/fa_index.json", "r") as fp:   #Pickling
    fa_index=json.load(fp)

In [6]:
ds=dataset(dataset_en,dataset_fa,en_index,fa_index)

In [249]:
class LSTM(nn.Module):
    def __init__(self,hidden_size,embeding_size,vocab_size):
        super(LSTM,self).__init__()
        self.input_net=nn.Embedding(num_embeddings=len(vocab),embedding_dim=embeding_dim)
        
        self.WF=nn.Parameter(torch.rand(hidden_size+embeding_size,hidden_size))
        self.BF=nn.Parameter(torch.rand(1,hidden_size))
        self.sigF=nn.Sigmoid()
        
        self.WI1=nn.Parameter(torch.rand(hidden_size+embeding_size,hidden_size))
        self.BI1=nn.Parameter(torch.rand(1,hidden_size))
        self.sigI=nn.Sigmoid()
        self.WI2=nn.Parameter(torch.rand(hidden_size+embeding_size,hidden_size))
        self.BI2=nn.Parameter(torch.rand(1,hidden_size))
        self.tanhI=nn.Tanh()
        
        self.WO=nn.Parameter(torch.rand(hidden_size+embeding_size,hidden_size))
        self.BO=nn.Parameter(torch.rand(1,hidden_size))
        self.tanhO=nn.Tanh()
        self.sigO=nn.Sigmoid()
        
        self.output_network=nn.ParameterList([
            nn.Linear(in_features=hidden_size+hidden_size,out_features=200),
            nn.ReLU(),
            nn.BatchNorm1d(200),
            nn.Linear(in_features=200,out_features=vocab_size),
            nn.Softmax(dim=1),
            nn.Dropout(0.2)
        ])
        
    def forward(self,x_batch,short_memory,long_memory):
        """
        x_batch = (batch_size,embeding_size)
        short_memory =(batch_size,hidden_size)
        long_memory =(batch_size,hidden_size)
        """
        emb_batch=self.input_net(x_batch)
        scaler=emb_batch.shape[0]
        #Forget gate
        new_batch=torch.concat((short_memory,emb_batch),dim=1) #(batch_size,hidden_size+embeding_size)
        zF=torch.matmul(new_batch,self.WF)/scaler +self.BF #batch_size,hidden_size
        aF=self.sigF(zF)
        
        #Input gate
        zI1=torch.matmul(new_batch,self.WI1)/scaler + self.BI1 #batch_size,hidden_size
        aI1=self.sigI(zI1)
        
        zI2=torch.matmul(new_batch,self.WI2)/scaler +self.BI2 #batch_size,hidden_size
        aI2=self.sigI(zI2)
        aI=aI1*aI2
        
        #Output gate
        long_memory=(long_memory*aF)+(long_memory*aI) #batch_size,hidden_size
        
        zO1=torch.matmul(new_batch,self.WO)/scaler +self.BO #batch_size,hidden_size
        aO1=self.sigO(zO1)
        
        aO2=self.tanhO(long_memory)
        
        short_memory=aO1*aO2
        
        x=torch.concat((short_memory,long_memory),dim=1)
        for l in self.output_network:
            x=l(x)
        return short_memory,long_memory,x
        

In [237]:
docs=["my name is parsa","hi , nice to meet you","i am very sad.","its nice to see you again"]

In [238]:
vectorizer=keras.layers.TextVectorization()
vectorizer.adapt(docs)
vocab=vectorizer.get_vocabulary()
dataset=torch.from_numpy(vectorizer(docs).numpy())

In [239]:
hidden_size=256
embeding_dim=128
batch_size=2
layer_number=2
vocab_size=len(vocab)
epochs=50

In [200]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7f08ba325300>

In [250]:
model=LSTM(hidden_size=hidden_size,embeding_size=embeding_dim,vocab_size=vocab_size)
oprtimizer=torch.optim.Adam(params=model.parameters(),lr=0.001)
loss_fn=torch.nn.CrossEntropyLoss()
model.train()
for ep in range(epochs):
    loss_list=[]
    short_memory=torch.rand(batch.shape[0],hidden_size)
    long_memory=torch.rand(batch.shape[0],hidden_size)
    for i in range(0,len(dataset),batch_size):
        batch=dataset[i:i+batch_size]
        
        for i in range(batch.shape[1]-1):
            x_batch=batch[:,i]
            new_short,new_long,y_pred=model(x_batch,short_memory,long_memory)
            loss=loss_fn(input=y_pred,target=batch[:,i+1])
            loss.backward()
            oprtimizer.step()
            oprtimizer.zero_grad()
            short_memory=new_short.detach()
            long_memory=new_long.detach()
            loss_list.append(loss.detach().item())
    print(np.mean(loss_list))

2.883557677268982
2.8524888515472413
2.8958346128463743
2.8897704124450683
2.891349744796753
2.849331021308899
2.908386993408203
2.8767438650131227
2.8703625917434694
2.854630160331726
2.8640942335128785
2.861625337600708
2.866396594047546
2.865682768821716
2.8760165214538573
2.8612011671066284
2.8107142448425293
2.807585668563843
2.7391353607177735
2.786743998527527
2.794482398033142
2.7813695907592773
2.814767861366272
2.8705655574798583
2.652857446670532
2.8766693592071535
2.640303540229797
2.765291619300842
2.8045185089111326
2.7939653158187867
2.797621965408325
2.785860705375671
2.8570302724838257
2.69811532497406
2.6587684392929076
2.871030831336975
2.7634974360466003
2.6939253568649293
2.842756915092468
2.7862711191177367
2.715727925300598
2.750377523899078
2.7981619358062746
2.773526334762573
2.8170146703720094
2.7043723464012146
2.8026283264160154
2.841095495223999
2.679882764816284
2.816312074661255


In [235]:
nn.Parameter(torch.Tensor(hidden_size+2,hidden_size)).shape

torch.Size([258, 256])

In [146]:
ls=torch.tensor([[1,2,10,4,5],[3,4,5,4,5],[10,7,9,4,5]])
emb=nn.Embedding(num_embeddings=11,embedding_dim=4)
emb(ls[:,1]).shape

torch.Size([3, 4])