In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
EOS_token = 0
SOS_token = 1

class Lang:
    def __init__(self,name):
        
        self.name = name
        self.word2idx = {}
        self.word2count = {}
        self.idx2word = {0:'EOS',1:'SOS'}
        self.num_words = 2
        
    def add_setence(self,sentence):
        for word in sentence.split(' '):
            self.add_word(word)
            
    def add_word(self,word):
        if word not in self.word2idx:
            self.word2idx[word] = self.num_words
            self.word2count[word] = 1
            self.idx2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count += 1
            

In [4]:
france = Lang('france')

In [5]:
france

<__main__.Lang at 0x11a5ec128>

In [6]:
#Helper functions!
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

# Read Data

In [7]:
def read_lang(path,lang1,lang2,reverse =False):
    print('Starting ! ! !')
    lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').read().strip().split('\n')
    
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    
     # Reverse pairs, make Lang instances
    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [11]:
class EncodeRNN(nn.Module):
    
    def __init__(self,input_size,hidden_size):
        super(EncodeRNN,self).__init__()
        self.hidden=hidden_size
        
        self.emb = nn.Embedding(input_size,hidden_size)
        self.gru = nn.GRU(hidden_size,hidden_size)
        
    def forward(self,x,hidden):
        embed = nn.emb(x).view(1,1,-1)
        output,hidden = nn.gru(embed,hidden)
        return output,hidden
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden, device=device)
        

In [12]:
class DecodeRNN(nn.Module):
    def __init__(self,hidden,output):
        super(DecodeRNN,self).__init__()
        self.hidden = hidden
        
        self.emb = nn.Embedding(output,hidden)
        self.gru = nn.GRU(hidden,hidden)
        self.fc = nn.Linear(hidden,output)
        
    def forward(self,x,hidden):
        
        embed = self.emb(x).view(1,1,-1)
        output,hidden = nn.gru(embed,hidden)
        output = F.log_softmax(output,dim=1)
        return output,hidden
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)