In [2]:
import torch
from torch import nn
from torch.autograd import Variable

from data_loader_multi_style import DataLoader
from model import UniSkip
from config import *
from datetime import datetime, timedelta

from tensorboardX import SummaryWriter
import os

import sys
sys.path.append('..')
import gather

[nltk_data] Downloading package punkt to /jet/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
language = "english"
d = DataLoader(("./dataset/"+language+"/corpus.txt", gather.get_datasets(language)))
n_categories = len(d.categories)
print(n_categories)

STD: OK
STD: OK
STD: OK
STD: OK
STD: OK
STD: OK
Loading corpus at ./dataset/english/corpus.txt
Datasets used:
common
austin
dickens
shakespeare
wilde
songs
Making dictionary for these words
Using cached dictionary at ./dataset/english/corpus.txt.pkl
Making reverse dictionary
[52828, 36346, 16418, 99807, 7090, 239838]
[0.11679161314712586, 0.08035337267065641, 0.03629674991764807, 0.22065231569196622, 0.015674500969431408, 0.5302314476031721]
6


In [3]:
mod = UniSkip(n_categories)
if USE_CUDA:
    print("USING CUDA")
    mod.cuda(CUDA_DEVICE)

USING CUDA


  "PyTorch was compiled without cuDNN support. To use cuDNN, rebuild "


In [4]:
lr = 3e-4
optimizer = torch.optim.Adam(params=mod.parameters(), lr=lr)

In [5]:
writer = SummaryWriter()

In [6]:
loss_trail = []
last_best_loss = None
start_i = 0


def save_model(i, loss, name):
    save_loc = "./saved_models/skip-{}-{}".format(name, VOCAB_SIZE)
    print("saving model at {}".format(save_loc))
    
    torch.save(mod.state_dict(), save_loc)
    torch.save((i, loss), save_loc+".meta")

def debug(i, loss, prev, nex, prev_pred, next_pred):
    global loss_trail
    global last_best_loss
    global current_time

    this_loss = loss.item()
    loss_trail.append(this_loss)
    loss_trail = loss_trail[-20:]
    
    try:
        trail_loss = sum(loss_trail)/len(loss_trail)
        if (last_best_loss is None or last_best_loss > trail_loss) and len(loss_trail) > 10:
            print("Loss improved from {} to {}".format(last_best_loss, trail_loss))
            save_model(i, trail_loss, "best")
            
            last_best_loss = trail_loss
    except Exception as e:
       print("Couldn't save model because {}".format(e))
    
def get_natural_sentence(res):
    sent = [x for x in res if x != 0]
    sent = d.convert_indices_to_sentences(sent)

    from sacremoses import MosesDetokenizer
    detokenizer = MosesDetokenizer()

    return detokenizer.detokenize(sent.split(" "), return_str=True)

In [7]:
reload_last_checkpoint = False
save_loc = "./saved_models/skip-best-{}".format(VOCAB_SIZE)
tally = [0 for _ in range(n_categories)]
    
if reload_last_checkpoint and os.path.exists(save_loc):
    print("Loading last checkpoint: "+save_loc)
    mod.load_state_dict(torch.load(save_loc))
    
    start_i, last_best_loss = torch.load(save_loc+".meta")
    print("At iter "+str(start_i)+" | Loss: " + str(last_best_loss))

In [None]:
print("Starting training...")

batch_size = 64*8

n_batches_per_epoch = sum(d.n_sentences)/batch_size
print(n_batches_per_epoch)


# a million iterations
for i in range(start_i, 1000000):
    sentences, category, lengths = d.fetch_batch(32 * 8)
    cat_tensor = torch.Tensor([1 if c == category else 0 for c in range(n_categories)]).cuda(CUDA_DEVICE)

    loss, prev, nex, prev_pred, next_pred  = mod(sentences, lengths, cat_tensor, cat_tensor)
    writer.add_scalar('loss', loss, i)
    writer.add_scalar('losses/'+d.categories[category], loss, i)
    tally[category] += 1
    writer.add_scalar('losses_std/'+d.categories[category], loss, tally[category])
    
    if i % 30 == 0:
        print(i)
        str_prev = get_natural_sentence(prev)
        str_prev_pred = get_natural_sentence(prev_pred)
        str_next = get_natural_sentence(nex)
        str_next_pred = get_natural_sentence(next_pred)
        writer.add_text('Prev', str_prev + ' | ' + str_prev_pred, i)
        writer.add_text('Next', str_next + ' | ' + str_next_pred, i)
        debug(i, loss, prev, nex, prev_pred, next_pred)
    
    if i % (int(3*n_batches_per_epoch)) == 0:
        save_model(i, loss, "checkpoint-{}".format(i // (3*n_batches_per_epoch)))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Starting training...
883.451171875


  "PyTorch was compiled without cuDNN support. To use cuDNN, rebuild "


4860


In [15]:
print(i)
save_loc = "./saved_models/skip-last"
print("saving model at {}".format(save_loc))
trail_loss = sum(loss_trail)/len(loss_trail)
torch.save(mod.state_dict(), save_loc)
torch.save((i, trail_loss), save_loc+".meta")

126134
saving model at ./saved_models/skip-last


In [None]:
print(d.fetch_batch(1 * 8))

In [11]:
print(start_i)

4840


In [10]:
start_i = i

In [14]:
print(tally)

[14912, 10079, 4579, 27614, 2016, 66936]


In [13]:
!nvidia-smi

Wed Jan 16 10:41:37 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 396.51                 Driver Version: 396.51                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P0   148W / 149W |   1324MiB / 11441MiB |     78%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    