<a href="https://colab.research.google.com/github/110805/Spelling_Correction/blob/master/seq2seq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/110805/Spelling_Correction.git
%cd Spelling_Correction/

Cloning into 'Spelling_Correction'...
remote: Enumerating objects: 26, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 26 (delta 11), reused 10 (delta 3), pack-reused 0[K
Unpacking objects: 100% (26/26), done.
/content/Spelling_Correction


In [2]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import time
import math
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
from os import system
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu
from dataloader import sample_pair
from dataloader import Lang



"""========================================================================================
The sample.py includes the following template functions:

1. Encoder, decoder
2. Training function
3. BLEU-4 score function

You have to modify them to complete the lab.
In addition, there are still other functions that you have to 
implement by yourself.

1. Your own dataloader (design in your own way, not necessary Pytorch Dataloader)
2. Output your results (BLEU-4 score, correction words)
3. Plot loss/score
4. Load/save weights
========================================================================================"""

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SOS_token = 0
EOS_token = 1
#----------Hyper Parameters----------#
hidden_size = 256
vocab_size = 17703 #The number of words in vocabulary
teacher_forcing_ratio = 0.7
LR = 0.05
MAX_LENGTH = 10

################################
#Example inputs of compute_bleu
################################
#The target word
reference = 'variable'
#The word generated by your model
output = 'varable'

#compute BLEU-4 score
def compute_bleu(output, reference):
    cc = SmoothingFunction()
    if len(reference) == 3:
        weights = (0.33,0.33,0.33)
    else:
        weights = (0.25,0.25,0.25,0.25)
    return sentence_bleu([reference], output,weights=weights,smoothing_function=cc.method1)

#Encoder
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.lstm(output, hidden)
        return output, hidden

    def initHidden(self):
        return (torch.zeros(1, 1, self.hidden_size, device=device), torch.zeros(1, 1, self.hidden_size, device=device))

#Decoder
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.lstm(output, hidden)
        output = self.out(output[0])
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    #----------sequence to sequence part for encoder----------#
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
        #encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
	
    #----------sequence to sequence part for decoder----------#
    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))



def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    lang = Lang('train.json')
    index = lang.addWord()
    training_pairs = [sample_pair('train.json', lang, index) for i in range(n_iters)]
    print('Finish sampling')
    criterion = nn.CrossEntropyLoss()
    
    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]
        input_tensor = input_tensor.to(device)
        target_tensor = target_tensor.to(device)

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))
            
        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    plt.figure(1)
    plt.plot(range(int(n_iters/plot_every)), plot_losses)
    plt.xlabel('Iterations*100')
    plt.ylabel('CrossEntropyLoss')
    plt.savefig('TrainingLoss')

encoder1 = EncoderRNN(vocab_size, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, vocab_size).to(device)
trainIters(encoder1, decoder1, 150000, print_every=5000)
torch.save(encoder1.state_dict(), 'encoder.pkl')
torch.save(decoder1.state_dict(), 'decoder.pkl')


Finish sampling
0m 32s (- 15m 53s) (5000 3%) 4.9314
1m 1s (- 14m 19s) (10000 6%) 4.8024
1m 31s (- 13m 47s) (15000 10%) 4.7251
2m 2s (- 13m 16s) (20000 13%) 4.6844
2m 32s (- 12m 44s) (25000 16%) 4.6273
3m 3s (- 12m 14s) (30000 20%) 4.5763
3m 34s (- 11m 44s) (35000 23%) 4.5352
4m 5s (- 11m 14s) (40000 26%) 4.4682
4m 35s (- 10m 43s) (45000 30%) 4.3837
5m 6s (- 10m 12s) (50000 33%) 4.2962
5m 37s (- 9m 42s) (55000 36%) 4.1730
6m 8s (- 9m 12s) (60000 40%) 4.0123
6m 39s (- 8m 41s) (65000 43%) 3.8055
7m 9s (- 8m 11s) (70000 46%) 3.5648
7m 40s (- 7m 40s) (75000 50%) 3.2579
8m 12s (- 7m 10s) (80000 53%) 2.9383
8m 43s (- 6m 40s) (85000 56%) 2.5538
9m 14s (- 6m 9s) (90000 60%) 2.2558
9m 44s (- 5m 38s) (95000 63%) 1.8709
10m 15s (- 5m 7s) (100000 66%) 1.5584
10m 46s (- 4m 37s) (105000 70%) 1.2434
11m 17s (- 4m 6s) (110000 73%) 0.9829
11m 48s (- 3m 35s) (115000 76%) 0.7820
12m 18s (- 3m 4s) (120000 80%) 0.5886
12m 49s (- 2m 33s) (125000 83%) 0.4732
13m 19s (- 2m 3s) (130000 86%) 0.3571
13m 50s (- 1m

In [5]:
def evaluate(encoder, decoder, lang, index, max_length=MAX_LENGTH):
    with torch.no_grad():
        pairs = [sample_pair('train.json', lang, index)]
        input_tensor = pairs[0][0]
        target_tensor = pairs[0][1]
        index2word = {v : k for k, v in index.items()}
        input_tensor = input_tensor.to(device)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            #decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                break
            else:
                decoded_words.append(index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, index2word[target_tensor[0].item()], index2word[input_tensor[0].item()]

lang = Lang('train.json')
index = lang.addWord()
score = 0
for i in range(50):
    output, reference, inp = evaluate(encoder1, decoder1, lang, index)
    print('input: {}'.format(inp))
    print('target: {}'.format(reference))
    print('pred: {}'.format(output[0]))
    print('--------------------')
    if len(output) != 0:
        score += compute_bleu(output[0], reference)
    else:
        score += compute_bleu('', reference)

print('BLEU-4 score:{}'.format(score/50))  

input: alloud
target: allowed
pred: allowed
--------------------
input: seroundings
target: surroundings
pred: surroundings
--------------------
input: merder
target: murder
pred: murder
--------------------
input: fictitous
target: fictitious
pred: fictitious
--------------------
input: bulletings
target: bulletins
pred: bulletins
--------------------
input: negociation
target: negotiation
pred: negotiation
--------------------
input: exspecting
target: expecting
pred: expecting
--------------------
input: boquet
target: bouquet
pred: bouquet
--------------------
input: substraction
target: subtraction
pred: subtraction
--------------------
input: blosom
target: blossom
pred: blossom
--------------------
input: acess
target: access
pred: access
--------------------
input: berth
target: birth
pred: birth
--------------------
input: shepard
target: shepherd
pred: shepherd
--------------------
input: enlargd
target: enlarged
pred: enlarged
--------------------
input: parth
target: path
p