In [2]:
import random
import time

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

%load_ext autoreload
%autoreload 2

import utils
import rnn_rating

MAX_LENGTH = rnn_rating.MAX_LENGTH
SOS_token = rnn_rating.SOS_token
EOS_token = rnn_rating.EOS_token

In [4]:
text, lines = rnn_rating.prepareData('attn_input.txt', 'attn_target.txt')
print(random.choice(lines))

test_lines = lines[-200:]
lines = lines[:-200]
print(len(test_lines), len(lines))

Read 3668 sentence lines
Trimmed to 1427 sentence lines
Counting words...
Counted words:
2590
('dope shop', 5.0)
200 1227


In [3]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, \
          criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    
    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden


    decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
    loss += criterion(decoder_output, target_tensor)


    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item()

In [4]:
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [rnn_rating.tensorsFromLine(text, random.choice(lines))
                      for i in range(n_iters)]
    criterion = nn.MSELoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (utils.timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)

In [1]:
import matplotlib.pyplot as plt
# plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [10]:
def evaluate(encoder, decoder, sentence, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = rnn_rating.tensorFromSentence(text, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        
        decoder_output, decoder_hidden, decoder_attention = decoder(
            decoder_input, decoder_hidden, encoder_outputs)

        return decoder_output.data, decoder_attention.data
    

def evaluateRandomly(encoder, decoder, lines, n=10):
    for i in range(n):
        line = random.choice(lines)
        print('>', line[0])
        print('=', line[1])
        output, attention = evaluate(encoder, decoder, line[0])
        print('<', output)
        print('')

In [11]:
def Test(encoder, decoder, input_tensor, target_tensor, criterion, max_length=MAX_LENGTH):
    with torch.no_grad():
        encoder_hidden = encoder.initHidden()
        input_length = input_tensor.size(0)
    
        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(
                input_tensor[ei], encoder_hidden)
            encoder_outputs[ei] = encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)
        decoder_hidden = encoder_hidden

        decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
        loss = criterion(decoder_output, target_tensor)
        return loss.item()
    
def IterTest(encoder, decoder, test_lines):
    criterion = nn.MSELoss()
    test_pairs = [rnn_rating.tensorsFromLine(text, line) for line in test_lines] 
    
    loss = 0
    
    for i in range(len(test_pairs)):
        test_pair = test_pairs[i]
        input_tensor = test_pair[0]
        target_tensor = test_pair[1]

        loss += Test(encoder, decoder, input_tensor, target_tensor, criterion)
        
    mse = loss / len(test_lines)
    return mse

In [6]:
hidden_size = 256
# encoder = rnn_rating.EncoderRNN(text.n_words, hidden_size).to(device)
# attn_decoder = rnn_rating.AttnDecoderRNN(hidden_size, text.n_words, dropout_p=0.1).to(device)

# trainIters(encoder, attn_decoder, 75000, print_every=5000)

In [26]:
showPlot(plot_losses)

NameError: name 'plot_losses' is not defined

In [9]:
torch.save(encoder.state_dict(), 'encoder_model.pth')
torch.save(attn_decoder.state_dict(), 'decoder_model.pth')

In [8]:
encoder = rnn_rating.EncoderRNN(text.n_words, hidden_size).to(device)
encoder.load_state_dict(torch.load('encoder_model.pth'))

attn_decoder = rnn_rating.AttnDecoderRNN(hidden_size, text.n_words, dropout_p=0.1).to(device)
attn_decoder.load_state_dict(torch.load('decoder_model.pth'))

In [12]:
evaluateRandomly(encoder, attn_decoder, test_lines)

> omg so awesome ! ! ! only negative is your own temptation to just go spend crazy cuz they got everything u want need
= 5.0
< tensor([[4.2075]], device='cuda:0')

> muy buenos tattos . .algun dia me tatuara kat
= 5.0
< tensor([[4.5920]], device='cuda:0')

> i highly recommend this place !
= 5.0
< tensor([[4.7039]], device='cuda:0')

> central location tasty sandwiches a variety of fruity beverages . friendly service too .
= 4.0
< tensor([[4.8277]], device='cuda:0')

> good amount of food for a decent price . the house prime rib was enough to get me stuffed !
= 4.0
< tensor([[2.4632]], device='cuda:0')

> nice food ok service not very special .
= 3.0
< tensor([[2.9204]], device='cuda:0')

> best ny style pizza joint this side of jersey ! eggplant and mozzarella ftw .
= 5.0
< tensor([[4.3483]], device='cuda:0')

> fun place to play paintball !
= 5.0
< tensor([[4.6981]], device='cuda:0')

> central location tasty sandwiches a variety of fruity beverages . friendly service too .
= 4.0
< t

In [13]:
IterTest(encoder, attn_decoder, test_lines)

1.2401601590967584

In [14]:
output_words, attentions = evaluate(
    encoder, attn_decoder, random.choice(lines)[0])
plt.matshow(attentions.cpu().numpy())

<matplotlib.image.AxesImage at 0x7fa825360400>

In [15]:
def showAttention(input_sentence, output_words, attentions):
    # Set up figure with colorbar
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions.cpu().numpy(), cmap='bone')
    fig.colorbar(cax)

    # Set up axes
#     ax.set_xticklabels([''] + input_sentence.split(' ') +
#                        ['<EOS>'], rotation=90)
#     ax.set_yticklabels([''] + output_words)

    # Show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()


def evaluateAndShowAttention(input_sentence):
    output_words, attentions = evaluate(
        encoder, attn_decoder, input_sentence)
    print('input =', input_sentence)
    print('output =', output_words.item())
    showAttention(input_sentence, output_words, attentions)

In [16]:
evaluateAndShowAttention(random.choice(lines)[0])

input = by dollar general market
output = 3.0067038536071777


  % get_backend())


In [19]:
plt.get_backend()

'agg'