In [1]:

from io import open
import glob

def findFiles(path): return glob.glob(path)

print(findFiles('data/names/*.txt'))

import unicodedata
import string

all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)

# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

print(unicodeToAscii('Ślusàrski'))

# Build the category_lines dictionary, a list of names per language
category_lines = {}
all_categories = []

# Read a file and split into lines
def readLines(filename):
    lines = open(filename, encoding='utf-8').read().strip().split('\n')
    return [unicodeToAscii(line) for line in lines]

for filename in findFiles('data/names/*.txt'):
    category = filename.split('/')[-1].split('.')[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

['data/names/Korean.txt', 'data/names/Spanish.txt', 'data/names/Scottish.txt', 'data/names/Russian.txt', 'data/names/Japanese.txt', 'data/names/Polish.txt', 'data/names/Czech.txt', 'data/names/French.txt', 'data/names/Dutch.txt', 'data/names/Irish.txt', 'data/names/German.txt', 'data/names/Portuguese.txt', 'data/names/Vietnamese.txt', 'data/names/English.txt', 'data/names/Italian.txt', 'data/names/Greek.txt', 'data/names/Arabic.txt', 'data/names/Chinese.txt']
Slusarski


In [2]:
import torch

# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
    return all_letters.find(letter)

# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
def letterToTensor(letter):
    tensor = torch.zeros(1, n_letters)
    tensor[0][letterToIndex(letter)] = 1
    return tensor

# Turn a line into a <line_length x 1 x n_letters>,
# or an array of one-hot letter vectors
def lineToTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li, letter in enumerate(line):
        tensor[li][0][letterToIndex(letter)] = 1
    return tensor

print(letterToTensor('J'))

print(lineToTensor('Jones').size()) # <line_length x 1 x n_letters>



Columns 0 to 12 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 13 to 25 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 26 to 38 
    0     0     0     0     0     0     0     0     0     1     0     0     0

Columns 39 to 51 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 52 to 56 
    0     0     0     0     0
[torch.FloatTensor of size 1x57]

torch.Size([5, 1, 57])


In [93]:
import numpy as np
import torch.nn as nn
from torch.autograd import Variable


class SRU(nn.Module):
    def __init__(self, x_dim, phi_dim, r_dim, o_dim, A):
        """ 
        args: x_dim:   入力xの次元
              phi_dim: phiの次元。\mu^{\alpha}の次元とも等しい
              r_dim:   rの次元
              o_dim:   出力oの次元
              A:       {\alpha_1, \alpha_2, ..., \alpha_m}
        """

        super(SRU, self).__init__()

        n_alpha      = len(A)
        self.n_alpha = n_alpha
        self.A       = A
        self.phi_dim = phi_dim
        # muの次元 = phiの次元*alphaの個数
        mu_dim = phi_dim * n_alpha 
        self.mu_dim = mu_dim
        
        # 各結合の定義
        self.mu2r    = nn.Linear(mu_dim, r_dim)
        self.xr2phi  = nn.Linear(x_dim + r_dim, phi_dim)
        self.mu2o    = nn.Linear(mu_dim, o_dim)
        self.softmax = nn.LogSoftmax()

    def forward(self, x, mu):
        
        r = self.mu2r(mu)
        phi = self.xr2phi(torch.cat((x, r), 1))
        mu = self.muphi2mu(mu, phi)
        o = self.mu2o(mu)
        o = self.softmax(o)
        return o, mu
    
    def muphi2mu(self, mu, phi):

        ''' すべてのalphaについて、\mu_t^{\alpha} = \alpha \mu_{t-1}^{\alpha} + (1-\alpha) \phi_t を同時に行う
        mask:     Kronecker product of (A, ones(phi_dim))
        phi_tile: Kronecker product of (ones(n_alpha), phi)
        '''
        
        A_expanded = self.A.expand(phi_dim, self.n_alpha)
        mask = torch.t(A_expanded).contiguous().view(-1)
        # 要素積をとるためにmaskをVariableに変換するが、あくまでmaskは定数項なのでrequires_grad=Falseをつける
        mask = Variable(mask, requires_grad=False)
        phi_expanded = phi.view(-1).expand(self.n_alpha, self.phi_dim)
        phi_repeated = phi_expanded.contiguous().view(-1)
        mu = torch.add(torch.mul(mask, mu.view(-1)), torch.mul((1-mask), phi_repeated)).view(1, -1)
        return mu

    def initMu(self):
        return Variable(torch.zeros(1, self.mu_dim))
    

phi_dim = 128
r_dim = 60
A = torch.Tensor([0.0, 0.5, 0.9, 0.99])

sru = SRU(n_letters, phi_dim, r_dim, n_categories, A)


In [None]:
''' forwardの動作確認 '''
input = Variable(letterToTensor('A'))
mu_0 = Variable(torch.zeros(1, phi_dim*len(A)))
output, mu_t = sru(input, mu_0)

In [104]:
def categoryFromOutput(output):
    top_n, top_i = output.data.topk(1) # Tensor out of Variable with .data
    category_i = top_i[0][0]
    return all_categories[category_i], category_i

print(categoryFromOutput(output))

import random

def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

def randomTrainingExample():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
    line_tensor = Variable(lineToTensor(line))
    return category, line, category_tensor, line_tensor

for i in range(10):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    print('category =', category, '/ line =', line)

('Scottish', 2)
category = French / line = Armistead
category = Irish / line = Finn
category = Russian / line = Holodilov
category = Italian / line = Tumicelli
category = Japanese / line = Seo
category = Portuguese / line = Soares
category = Polish / line = Kijek
category = Korean / line = Kwak
category = Scottish / line = Johnstone
category = Greek / line = Close


In [105]:
criterion = nn.NLLLoss()

In [106]:
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn

def train(category_tensor, line_tensor):
    mu = sru.initMu()

    sru.zero_grad()

    for i in range(line_tensor.size()[0]):
        o, mu = sru(line_tensor[i], mu)

    loss = criterion(o, category_tensor)
    loss.backward()

    # Add parameters' gradients to their values, multiplied by learning rate
    for p in sru.parameters():
        p.data.add_(-learning_rate, p.grad.data)

    return o, loss.data[0]

In [None]:
import time
import math

n_iters = 100000
print_every = 5000
plot_every = 1000



# Keep track of losses for plotting
current_loss = 0
all_losses = []

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

start = time.time()

for iter in range(1, n_iters + 1):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output, loss = train(category_tensor, line_tensor)
    current_loss += loss

    # Print iter number, loss, name and guess
    if iter % print_every == 0:
        guess, guess_i = categoryFromOutput(output)
        correct = '✓' if guess == category else '✗ (%s)' % category
        print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))

    # Add current loss avg to list of losses
    if iter % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0

5000 5% (0m 57s) 0.2801 Tchajengin / Russian ✓
10000 10% (1m 56s) 0.2744 Moon / Korean ✓
15000 15% (2m 55s) 3.1425 Shaw / Chinese ✗ (Scottish)
20000 20% (3m 53s) 0.7539 Ebner / German ✓
