<a href="https://colab.research.google.com/github/Tierprot/Deep-Learning/blob/master/homework6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os

import itertools
import pickle
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math 

In [4]:
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
import sys
sys.path.append('/content/gdrive/My Drive/Colab_Notebooks/rnn')
%matplotlib inline

In [6]:
!ls '/content/gdrive/My Drive/Colab_Notebooks/rnn/wikitext'

test.txt  train.txt  valid.txt


In [7]:
from torchtext.data import Field, BPTTIterator
from torchtext.datasets import WikiText2
from torchtext.vocab import Vocab
from collections import Counter

def tokenizer(text): 
    return [i for i in text]

batch_size = 128
sequence_length = 30
grad_clip = 0.1
lr = 4.
best_val_loss = None
log_interval = 100

text = Field(sequential=True, 
             tokenize=tokenizer)

train_set, val_set, test_set =  WikiText2.splits(text_field=text,
                                  train='/content/gdrive/My Drive/Colab_Notebooks/rnn/wikitext/train.txt',
                                  validation='/content/gdrive/My Drive/Colab_Notebooks/rnn/wikitext/valid.txt',
                                  test='/content/gdrive/My Drive/Colab_Notebooks/rnn/wikitext/test.txt')

train_iter, val_iter, test_iter = BPTTIterator.splits(
                                    (train_set, val_set, test_set),
                                    batch_size=batch_size,
                                    bptt_len=sequence_length,
                                    repeat=False)

text.build_vocab(train_set, val_set, test_set)

downloading wikitext-2-v1.zip


wikitext-2-v1.zip: 100%|██████████| 4.48M/4.48M [00:00<00:00, 41.4MB/s]


extracting


In [0]:
class RNNModel(nn.Module):
    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type == 'LSTM':
            self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(ninp, nhid, nlayers, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)
        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers
 
    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.fill_(0)
        self.decoder.weight.data.uniform_(-initrange, initrange)
 
    def forward(self, x, hidden=None):
        emb = self.drop(self.encoder(x))
        output, hidden = self.rnn(emb, hidden)
        output = self.drop(output)
        decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
        return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        if self.rnn_type == 'LSTM':
            return (weight.new(self.nlayers, bsz, self.nhid).zero_(),
                    weight.new(self.nlayers, bsz, self.nhid).zero_())
        else:
            return weight.new(self.nlayers, bsz, self.nhid).zero_()

In [0]:
ntokens = len(train_set.fields['text'].vocab.itos)
model = RNNModel('LSTM', ntokens, 128, 128, 2, 0.3)
criterion = nn.CrossEntropyLoss()

In [0]:
def train():
    model.train()
    total_loss = 0
    n_tokens = len(train_set.fields['text'].vocab.itos)
    for batch_idx, batch in enumerate(train_iter):
        data, targets = batch.text, batch.target
        model.zero_grad()
        output, hidden = model(data)
        loss = criterion(output.view(-1, ntokens), torch.flatten(targets))
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.item()

        if batch_idx % log_interval == 0 and batch_idx > 0:
            cur_loss = total_loss / log_interval
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch_idx, len(train_iter), lr, cur_loss, math.exp(cur_loss)))
            total_loss = 0

In [0]:
def evaluate(data_loader):
    model.eval()
    total_loss = 0
    hidden = model.init_hidden(batch_size)
    for i, batch in enumerate(data_loader):
        data, targets = batch.text, batch.target
        output, hidden = model(data)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output.view(-1, ntokens), torch.flatten(targets)).item()
    return total_loss / len(data_loader)

In [0]:
def generate(n=50, temp=1.):
    model.eval()
    x = torch.rand(1, 1).mul(ntokens).long()
    hidden = None
    out = []
    for i in range(n):
        output, hidden = model(x, hidden)
        s_weights = output.squeeze().data.div(temp).exp()
        s_idx = torch.multinomial(s_weights, 1)[0]
        x.data.fill_(s_idx)
        s = train_set.fields['text'].vocab.itos[s_idx]
        out.append(s)
    return ''.join(out)

In [43]:
with torch.no_grad():
    print('sample:\n', generate(50), '\n')

for epoch in range(1, 6):
    train()
    val_loss = evaluate(val_iter)
    print('-' * 89)
    print('| end of epoch {:3d} | valid loss {:5.2f} | valid ppl {:8.2f}'.format(
        epoch, val_loss, math.exp(val_loss)))
    print('-' * 89)
    if not best_val_loss or val_loss < best_val_loss:
        best_val_loss = val_loss
    else:
        # Anneal the learning rate if no improvement has been seen in the validation dataset.
        lr /= 4.0
    with torch.no_grad():
        print('sample:\n', generate(50), '\n')

sample:
 èu्დć殻ვ﻿PنTاbاD,火لBtM隊€6ocGต1“pTA⅔ầH჻Á攻fF£D-♯्±هśN 

| epoch   1 |   100/ 2808 batches | lr 4.00 | loss  3.60 | ppl    36.68
| epoch   1 |   200/ 2808 batches | lr 4.00 | loss  3.28 | ppl    26.67
| epoch   1 |   300/ 2808 batches | lr 4.00 | loss  3.25 | ppl    25.80
| epoch   1 |   400/ 2808 batches | lr 4.00 | loss  3.22 | ppl    25.15
| epoch   1 |   500/ 2808 batches | lr 4.00 | loss  3.22 | ppl    24.98
| epoch   1 |   600/ 2808 batches | lr 4.00 | loss  3.11 | ppl    22.43
| epoch   1 |   700/ 2808 batches | lr 4.00 | loss  3.00 | ppl    20.16
| epoch   1 |   800/ 2808 batches | lr 4.00 | loss  2.91 | ppl    18.44
| epoch   1 |   900/ 2808 batches | lr 4.00 | loss  2.83 | ppl    16.89
| epoch   1 |  1000/ 2808 batches | lr 4.00 | loss  2.73 | ppl    15.27
| epoch   1 |  1100/ 2808 batches | lr 4.00 | loss  2.61 | ppl    13.66
| epoch   1 |  1200/ 2808 batches | lr 4.00 | loss  2.55 | ppl    12.82
| epoch   1 |  1300/ 2808 batches | lr 4.00 | loss  2.50 | ppl    12.20
| 

In [0]:
t1 = generate(10000, 1.)
t15 = generate(10000, 1.5)
t075 = generate(10000, 0.75)
with open('/content/gdrive/My Drive/Colab_Notebooks/rnn/generated075.txt', 'w') as outf:
    outf.write(t075)
with open('/content/gdrive/My Drive/Colab_Notebooks/rnn/generated1.txt', 'w') as outf:
    outf.write(t1)
with open('/content/gdrive/My Drive/Colab_Notebooks/rnn/generated15.txt', 'w') as outf:
    outf.write(t15)