In [2]:
import pandas as pd
import numpy as np
import torch 
import torch.nn as nn
import torch.nn.functional as F

import os

import random

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
def get_voc(df):
    english = " ".join(df["english"].tolist()).split(" ")
    eng_voc = list(dict.fromkeys(english))
    french  = " ".join(df["french"].tolist()).split(" ")
    fre_voc = list(dict.fromkeys(french))
    return {"eng_voc":eng_voc, "fre_voc":fre_voc} 

In [6]:
MAX_LENGTH = 40

## The encoder:

In [7]:
class Encoder_model(nn.Module):
    def __init__(self, input_size, hidden_size, vocabulary_size):
        super(Encoder_model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.embd = nn.Embedding(vocabulary_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        
    def forward(self, x, hidden):
        x = self.embd(torch.tensor(x).long())
        x, hidden = self.gru(x.view(1,1,-1), hidden)  
        x, hidden = self.gru(x.view(1,1,-1), hidden) 
        x, hidden = self.gru(x.view(1,1,-1), hidden) 
        x, hidden = self.gru(x.view(1,1,-1), hidden) 
            
        return x, hidden

    def inithidden(self):
        return torch.zeros(1,1,self.hidden_size).float().to(device=device)
            
            


## The Decoder:

In [8]:
class Attention_decoder(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(Attention_decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        #self.lstm = nn.LSTM(hidden_size, hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)
        
    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input)
        embedded = self.dropout(embedded)
        
        CONCAT = torch.cat((embedded, hidden[0,0]), 0)
        ATTN = self.attn(CONCAT)
       
        attn_weights = F.softmax(ATTN, dim=0)
       
        attn_applied = torch.bmm(attn_weights.view(1,1,MAX_LENGTH),
                                 encoder_outputs.view(1,MAX_LENGTH,-1))
        
        output = self.attn_combine(torch.cat((embedded, attn_applied[0,0]), 0))
        output = F.relu(output)
        
        output, hidden = self.gru(output.view(1,1,-1), hidden)
        output, hidden = self.gru(output.view(1,1,-1), hidden)
        output, hidden = self.gru(output.view(1,1,-1), hidden)
        output, hidden = self.gru(output.view(1,1,-1), hidden)
        
        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights
        
        
        return x, hidden

In [9]:
class data_loader():
    def __init__(self, dataset_path):

        
        df_ = pd.read_csv(dataset_path)
        df = pd.DataFrame()
        df["inputs"] = df_["inputs"]
        df["outputs"] = df_["outputs"]

        self.dataframe = df

        self.len = len(self.dataframe)
        
        
        
        vocab = []
        for line in df_["outputs"]:
            for word in line.split(" "):
                vocab.append(word)
        for line in df_["inputs"]:
            for word in line.split(" "):
                vocab.append(word)
        
        vocab = list(dict.fromkeys(vocab))
        vocab.sort()
        self.vocab = vocab
        self.len_vocab = len(self.vocab)
        
    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        info = self.dataframe.iloc[idx]
        return {"input":info["inputs"], "output":info["outputs"]}
    
    def input2tensor(self, sentence):
        vocab = self.vocab
        
        idx_list = []
        words = sentence.strip().split(" ")

        for word in words:
            idx_list.append(vocab.index(word))
            
        return torch.tensor(data=idx_list).long()
    
    def get_random_indexes(self):
        L = list(range(0,self.len))
        random.shuffle(L)
        return L

## The training yaaas:

In [10]:
def train(input, output, optim1, optim2, criterion):
    input = input.to(device=device)
    output = output.to(device=device)

    optim1.zero_grad()
    optim2.zero_grad()
    hidden = encoder.inithidden()
    loss = 0
    
    #The encoding phase
    encoder_outputs = torch.zeros(MAX_LENGTH, encoder.hidden_size).to(device=device)
    
    for i in range(len(input)):
        out , hidden = encoder(input[i], hidden)
        encoder_outputs[i] = out[0,0]
        
    luck = random.randint(0,1)

    if luck: #EASY TEACHER
        idx = output[0]    
        for i in range(len(output)-1):
            out, hidden, decoder_attention = decoder(idx, hidden, encoder_outputs)
            idx = torch.argmax(out)
            out = out.view(1,-1)
            y = output[i+1].view(1) 
            loss += criterion(out, y)
    else:    #ROUGH TEACHER
        for i in range(len(output)-1):
            out, hidden, decoder_attention = decoder(output[i], hidden, encoder_outputs)
            out = out.view(1,-1)
            y = output[i+1].view(1) 
            loss += criterion(out, y)

    
        
    loss.backward()
    optim1.step()
    optim2.step()
        
        
    return loss/len(output)
        
        
    

In [11]:
def evaluate(sentence): 
    vocab = sentences_dataset.vocab
    idxs = sentences_dataset.input2tensor(sentence).to(device=device)
    hidden = encoder.inithidden()
    encoder_outputs = torch.zeros(MAX_LENGTH, encoder.hidden_size).to(device=device)

    for i in range(len(idxs)):
        out, hidden = encoder(idxs[i], hidden)
        encoder_outputs[i] = out[0,0]

        
    idx = torch.tensor(13).long().to(device=device)
    for _ in range(10):
        out, hidden, __ = decoder(idx, hidden, encoder_outputs)
        idx = torch.argmax(out)
        if vocab[idx] == "<eos>":
            break
        print(vocab[idx], end=" ")
        
    print("")

In [13]:
sentences_dataset = data_loader(dataset_path="drive/My Drive/data/chatbot.csv")

In [14]:
len_vocab = sentences_dataset.len_vocab

encoder = Encoder_model(1, 256, len_vocab).to(device=device)
decoder = Attention_decoder( 256, len_vocab).to(device=device)


criterion   = nn.CrossEntropyLoss()
optim1      = torch.optim.Adam(encoder.parameters())
optim2      = torch.optim.Adam(decoder.parameters())

epochs      = 3
print_step  = 100
losses      = []

In [None]:
for epoch in range(1, epochs + 1):
    print("[EPOCH] Epoch number:", epoch)
    
    #get the list of indexes
    indexes = sentences_dataset.get_random_indexes()
    #loop over all indexes ( all sentences )
    for (i, idx) in enumerate(indexes):

        inp_out = sentences_dataset[idx]
        #get english and french sentences
        input  = inp_out["input"]
        output  = inp_out["output"]
        #print(input)
        #print(output)
        #creat arrays of tensors
        input = sentences_dataset.input2tensor(input)
        output = sentences_dataset.input2tensor(output)

        if len(input) <= MAX_LENGTH:
            loss = train(input, output, optim1, optim2, criterion)
            loss = loss.item()
            losses.append(loss)
            
            if i % 20 == 0:
                print("     [LOSS] Loss in step",i,"=", loss)
            if i % 100 == 0:
                evaluate("how are you doing")

            if i % 1000 == 0:
                torch.save(encoder.state_dict(), "drive/My Drive/data/chatbot_encoder")
                torch.save(decoder.state_dict(), "drive/My Drive/data/chatbot_decoder")


[EPOCH] Epoch number: 1
     [LOSS] Loss in step 0 = 5.250875473022461
I 


  # Remove the CWD from sys.path while we load stuff.


     [LOSS] Loss in step 20 = 4.533743381500244
     [LOSS] Loss in step 40 = 3.4902899265289307
     [LOSS] Loss in step 60 = 4.215512752532959
     [LOSS] Loss in step 80 = 5.537115573883057
     [LOSS] Loss in step 100 = 5.7540411949157715

     [LOSS] Loss in step 120 = 6.377120494842529
     [LOSS] Loss in step 140 = 3.1568763256073
     [LOSS] Loss in step 160 = 4.6624321937561035
     [LOSS] Loss in step 180 = 4.99078893661499
     [LOSS] Loss in step 200 = 5.610987186431885
I can see the way 
     [LOSS] Loss in step 220 = 4.901833534240723
     [LOSS] Loss in step 240 = 5.118227481842041
     [LOSS] Loss in step 260 = 4.078178882598877
     [LOSS] Loss in step 280 = 6.608697891235352
     [LOSS] Loss in step 300 = 5.306408882141113
I dont know 
     [LOSS] Loss in step 320 = 2.6803653240203857
     [LOSS] Loss in step 340 = 6.167900085449219
     [LOSS] Loss in step 360 = 1.9387190341949463
     [LOSS] Loss in step 380 = 5.644211769104004
     [LOSS] Loss in step 400 = 5.17358

In [16]:
evaluate("I love you please")

I 


  # Remove the CWD from sys.path while we load stuff.
