In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import torch
import pylab
import torch.nn.functional as F
import torchvision.models as models
import random
import numpy as np
from torch.autograd import Variable
import torch.optim as optim
import torch.nn as nn
import matplotlib.pyplot as plt
import unicodedata
import string
import time
import math
import matplotlib.ticker as ticker
import torch.utils.data as data_utils
import shutil
import pdb


import torch.multiprocessing as mp
from multiprocessing import set_start_method
import pickle
torch.manual_seed(1)

try:
    set_start_method('spawn')
except RuntimeError:
    pass

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
## Helper functions

### File IO
def find_files(path): return glob.glob(path)

def read_lines(filename):
    lines = open(filename, encoding='utf-8').read() # remove newline
    return lines


def ids2string(ids):
    return "".join(index_to_char[i] for i in ids.numpy())
    

### Data Preprocessing
### (seq, batch, n_letter)
def ids2tensor(ids):
    length = ids.shape[0]
    tensor = np.zeros((1, length, n_letters), dtype=np.float32)
    for li in range(length):
        id = ids[li]
        tensor[0][li][id] = 1 ## (batch, seq, n_letters)
    return torch.Tensor(tensor).view(length, 1, -1)

def string2tensor(string):
    length = len(string)
    tensor = np.zeros((1, length, n_letters), dtype=np.float32)
    for li in range(length):
        letter = string[li]
        tensor[0][li][char_to_index[letter]] = 1 ## (batch, seq, n_letters)
    return torch.Tensor(tensor).view(length, 1, -1)

def string2ids(string):
    ids = torch.LongTensor(len(string))
    for i, c in enumerate(string):
        ids[i] = char_to_index[c]
    return ids

## return batched input tensor and target tensor
def get_batch(idx, sequence, batch_size=1):
    inputs = torch.cat([ids2tensor(ids[i:i+sequence]) 
                        for i in range(idx, min(idx + batch_size, data_size-sequence))], dim=1)
    
    targets = torch.LongTensor(
            [
                [ids[j] for j in range(i+1, i+sequence+1)]
                for i in range(idx, min(idx + batch_size, data_size-sequence))
            ]
        ) 
        # list of list of target long number
    
    return inputs.pin_memory().cuda(non_blocking=True), targets.pin_memory().cuda(non_blocking=True)


### checkpoint 
def save_checkpoint(state, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    print ("saved")

In [None]:
## Data Loading
TEXT_PATH = 'enwik8'

lines = ""
for filename in find_files(TEXT_PATH):
    lines = read_lines(filename)

 #list(set(lines))

with open("all_letters", "rb") as f:   # Unpickling
    all_letters = pickle.load(f)

with open("wiki_ids", "rb") as f:   # Unpickling
    ids = pickle.load(f)
ids = torch.LongTensor(ids)
    
    
data_size, n_letters = len(lines), len(all_letters)

char_to_index = {ch : i for i, ch in enumerate(all_letters)}
index_to_char = {i : ch for i, ch in enumerate(all_letters)}

In [27]:
import dictionary


Dictionary()

NameError: name 'Dictionary' is not defined

In [None]:
## Models
### input: (seq,  batch, input_size)
### hidden: (num_layers * direction, batch, hidden_size)

class PredictionNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, batch_size):
        super(PredictionNetwork, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.output_size = output_size
        
        
        ## Weights
        self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1)
        self.lstm2 = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1)
        self.lstm3 = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1)
        self.lstm2input = nn.Linear(hidden_size + input_size, input_size)
        self.lstm3input = nn.Linear(hidden_size + input_size, input_size)
        
        self.h2o = nn.Linear(hidden_size * 3, output_size)
        
    def forward(self, input, hiddens):
        # LSTM input of shape (seq_len, batch, input_size)
        # outputs: (seq_len, batch, num_directions * hidden_size)
        [h1, h2, h3] = hiddens
        
        outputs1, _ = self.lstm1(input, h1)
        # cat then linear transform
        # Switch batch and sequence dimension ?? 
        input = self.lstm2input(torch.cat((input, outputs1), 2))
        outputs2, _ = self.lstm2(input, h2)
        
        input = self.lstm3input(torch.cat((input, outputs2), 2))
        outputs3, _ = self.lstm3(input, h3)
        
        outputs = self.h2o(torch.cat((outputs1, outputs2, outputs3), 2))
        ## correct ?? 
        
        outputs = F.log_softmax(outputs, 2)
        return outputs
    
    def initHidden(self, layer=1, use_gpu=True):
        hidden_layers = []
        for i in range(0, layer): 
            h = Variable(torch.randn(1, self.batch_size, self.hidden_size)).pin_memory()
            c = Variable(torch.randn(1, self.batch_size, self.hidden_size)).pin_memory()
            if use_gpu:
                h = h.to(device)
                c = c.to(device)
            hidden_layers.append(
                (h,c)
            )
        return hidden_layers

In [None]:
hidden_size = 512
input_size = n_letters
output_size = n_letters


batch_size = 50 ## batch size too big may cause CUDNN_STATUS_EXECUTION_FAILED
sequence = 50

In [None]:
from_checkpoint = True
new_model = False


### Define Model

#adhoc
model = PredictionNetwork(n_letters, hidden_size, n_letters, batch_size).cuda()
checkpoint = torch.load('checkpoint.pth.tar')
model.load_state_dict(checkpoint['state_dict'])
###

if new_model:
    model = PredictionNetwork(n_letters, hidden_size, n_letters, batch_size).to(device)

model.batch_size = batch_size


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(np.exp(preds)).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def sample_text(test_model, ids, temperature=1.0, max_sample_length=100):
    if ids.shape[0] != sequence:
        print ("line length doesnt match sequence, break")
        return

    input = ids2tensor(ids.cpu()).view(sequence, 1, -1).pin_memory().cuda()
    
    string = ids2string(ids.cpu())
    output_string = string
    print ("Starting with : " + string)
    
    origin_batch_size = test_model.batch_size
    test_model.batch_size = 1
    
    print_every = 50
    for i in range(max_sample_length):
        hiddens = test_model.initHidden(layer=3, use_gpu=True)
        outputs = test_model(input, hiddens)
        topi = sample(outputs[-1][0].data.cpu().numpy(), temperature)
        
        if topi == n_letters - 1:
            break
        else:
            letter = all_letters[topi]
            string = string[1:] + letter
            output_string += letter
        
        input = string2tensor(string).view(sequence, 1, -1).pin_memory().cuda()
        
        if i % print_every == 0 and i > 0:
            print ("In Progress {} / {},  Takes {} seconds.".format(i, max_sample_length, time.time() - start))
        
    print ("Generating Text: \n")
    test_model.batch_size = origin_batch_size
    return output_string


start_idx = 500
string_ids_start_with = ids[start_idx:start_idx+sequence]

start = time.time()
print(sample_text(model, string_ids_start_with, temperature=0.3, max_sample_length=300)) 
end = time.time()
print ("\n Finished ! Takes {} seconds ".format(end - start))

In [None]:
ids = ids.to(device)

In [None]:
## Training

batch_size = 300
model.batch_size = batch_size

###

ids = ids.cpu().pin_memory().cuda(non_blocking=True).contiguous()
model = model.to(device)


criterion = nn.NLLLoss().to(device)
max_clip_norm = 2
optimizer = optim.Adam(model.parameters(), lr=0.001)


from_checkpoint = True
start_epoch = 0
start_iter = 0



if from_checkpoint:
    print("=> loading checkpoint ")
    checkpoint = torch.load('checkpoint.pth.tar')
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    start_epoch = checkpoint['epoch']
    start_iter = checkpoint['iter']


### Training
epochs = 3
print_every = 50
plot_every = 50
save_every = 2000
sample_every = 2000

#### sampling string
start_idx = 0
string_ids_start_with = ids[start_idx:start_idx+sequence]

def train(model, epoch):
    global start_epoch
    global start_iter
    
    
    ### book keeping
    all_losses = []
    total_loss = 0
    start = time.time()
    
    iters = int(data_size / batch_size)
    
    if from_checkpoint is False:
        start_iter = 0
    else:
        from_checkpoint is False
    
    hiddens = model.initHidden(layer=3, use_gpu=True)
    for iter in range(start_iter, iters):
        train_start = time.time()
        
        inputTensor, targetTensor = get_batch(iter * batch_size, sequence, batch_size)
        
        if inputTensor.shape[1] < batch_size:
            print ("{} not enough samples for batch, break".format(inputTensor.shape[0]))
            break

       
        hiddens = repackage_hidden(hiddens)
        
        output, loss = train_batch(model, inputTensor, targetTensor, hiddens)
        total_loss += loss
        #del inputTensor, targetTensor, output
        
        
        ##### Tracking
        if iter % print_every == 0 and iter > 0:
            print ("Epoch : {}, Iteration {} / {}, Loss per {} :  {}, Takes {} Seconds".format(epoch+1, iter, iters, print_every,  loss, time.time() - start))
            start = time.time()

        if iter % plot_every == 0 and iter > 0:
            all_losses.append(total_loss / plot_every)
            total_loss = 0
            
        if iter % sample_every == 0 and iter > 0:
            print(sample_text(model, string_ids_start_with, temperature=1, max_sample_length=100))    

        if iter % save_every == 0 and iter > 0: 
            save_checkpoint({
                'epoch': epoch,
                'iter': iter,   
                'all_losses': all_losses,
                'state_dict': model.state_dict(),
                'optimizer' : optimizer.state_dict(),
            })
        ##### 
        print ("train {}".format(time.time() - train_start))
        
        
    return all_losses


# outputs: (seq_len, batch, n_letters)
# target_tensor (batch, seq)
def train_batch(model, input_tensor, target_tensor, hiddens):
    optimizer.zero_grad()
    
    outputs = model(input_tensor, hiddens)
    
    
    loss = 0
    target_tensor = target_tensor.view(sequence, batch_size) #(sequence, batch)
    
    for i in range(0, sequence):
        loss += criterion(outputs[i],  target_tensor[i])
    
    loss.backward()
    torch.nn.utils.clip_grad_norm(model.parameters(), max_clip_norm)
    optimizer.step()

    return outputs, loss.item() / batch_size

print ("Start Training") 



# set CUDA_LAUNCH_BLOCKING = 0 to do async operation
torch.backends.cudnn.enabled = True
if from_checkpoint is False:
    start_epoch = 0
    
def get_batch(idx, sequence, batch_size=1):
    inputs = torch.cat([ids2tensor(ids[i:i+sequence]) 
                        for i in range(idx, min(idx + batch_size, data_size-sequence))], dim=1)
    
    targets = torch.LongTensor(
            [
                [ids[j] for j in range(i+1, i+sequence+1)]
                for i in range(idx, min(idx + batch_size, data_size-sequence))
            ]
        ) 
        # list of list of target long number
    return inputs.pin_memory().cuda(non_blocking=True).contiguous(), targets.pin_memory().cuda(non_blocking=True).contiguous()


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""
    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

    
    
start = time.time()
for epoch_iter in range(start_epoch, epochs):
    print ("Start Training Epoch ", epoch_iter+1) 
    
    # train
    train(model, epoch_iter)
    
    # valid

end = time.time()
print ("Training finished ! Takes {} seconds ".format(end - start))

In [None]:
##### char RNN paper https://arxiv.org/pdf/1308.0850.pdf

In [None]:
#all_losses = checkpoint['all_losses']
#losses = [float(l.cpu().numpy()) for l in all_losses]
#plt.plot(losses)

NameError: name 'text' is not defined