In [1]:
%matplotlib widget
import numpy as np
import math
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import random

import matplotlib.pyplot as plt

import re

from tqdm import tqdm
from tqdm import tnrange, tqdm_notebook

from joblib import Parallel, delayed
import multiprocessing
from datetime import datetime

from torch.utils.data import DataLoader, Dataset
from IPython.display import clear_output

In [2]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:1")
device

device(type='cuda', index=1)

In [3]:
torch.manual_seed(1010101011)
random.seed(1010101011)

In [4]:
ftrainset = open("../dataset/train.txt", encoding='utf8')
trainset = [slowo.replace("\n", "") for slowo in ftrainset.readlines()]
ftrainset.close()

ftestset = open("../dataset/test.txt", encoding='utf8')
testset = [slowo.replace("\n", "") for slowo in ftestset.readlines()]
ftestset.close()

In [5]:
trainset = trainset[:20]
# testset = testset[:7]

In [6]:
print("Train: ", len(trainset))
print("Test: ", len(testset))

Train:  20
Test:  8931


In [7]:
chartoidx = {}

cnt = 0

longestword = 0

chartoidx["<EMPTY>"] = 0
chartoidx["<END>"] = 1
chartoidx["<START>"] = 2

for slowo in tqdm_notebook(trainset+testset):  
    for litera in slowo:
        if litera not in list(chartoidx.keys()):
            chartoidx[litera] = cnt+3
            cnt = cnt + 1
            
    if len(slowo) > longestword:
        longestword = len(slowo)
            
vocabsize = len(list(chartoidx.keys()))

HBox(children=(IntProgress(value=0, max=8951), HTML(value='')))




In [8]:
# torch.save(chartoidx, "../models/chartoidx_rockyou_limited.pt")
# torch.save(longestword, "../models/longestword_rockyou_limited.pt")
# torch.save(vocabsize, "../models/vocabsize_limited.pt")

In [9]:
# chartoidx = torch.load("../models/chartoidx_rockyou_limited.pt")
# longestword = torch.load("../models/longestword_rockyou_limited.pt")
# vocabsize = torch.load("../models/vocabsize_limited.pt")

In [10]:
longestword

15

In [11]:
chartoidx

{'<EMPTY>': 0,
 '<END>': 1,
 '<START>': 2,
 'p': 3,
 'i': 4,
 'k': 5,
 'u': 6,
 's': 7,
 '8': 8,
 '0': 9,
 'a': 10,
 'n': 11,
 'e': 12,
 'c': 13,
 'A': 14,
 '1': 15,
 '2': 16,
 '3': 17,
 '4': 18,
 '5': 19,
 '6': 20,
 '7': 21,
 '9': 22,
 'l': 23,
 't': 24,
 'g': 25,
 'h': 26,
 'd': 27,
 'm': 28,
 'r': 29,
 'M': 30,
 'I': 31,
 'S': 32,
 'K': 33,
 'J': 34,
 'o': 35,
 'P': 36,
 'j': 37,
 'z': 38,
 'B': 39,
 'U': 40,
 'b': 41,
 'f': 42,
 'w': 43,
 'W': 44,
 'y': 45,
 'R': 46,
 'E': 47,
 'x': 48,
 'v': 49,
 'q': 50,
 'N': 51,
 'L': 52,
 'T': 53,
 'Z': 54,
 'G': 55,
 'D': 56,
 'C': 57,
 'H': 58,
 'O': 59,
 '.': 60,
 '#': 61,
 'F': 62,
 '%': 63,
 'Q': 64,
 'Y': 65,
 '!': 66,
 'X': 67,
 '-': 68,
 ' ': 69,
 '_': 70,
 'V': 71,
 ',': 72}

In [12]:
class MyDataset(Dataset):
    def __init__(self, slowa, chartoidx, longestword, padding):
        self.slowa = slowa
        self.chartoidx = chartoidx
        self.longestword = longestword
        self.padding = padding
        
    def __len__(self):
        return len(self.slowa)
        
    def __getitem__(self, index):
        literyx = []
        literyy = []
        
        slowo = self.slowa[index]
        
#         print(slowo)
#         print(startfrom)
        
        for cnt in range(self.longestword):
#             print("cnt2", cnt2)
            if cnt==0:                
                literyx.append(chartoidx["<START>"])
                literyy.append(chartoidx[slowo[cnt]])
            elif cnt < (len(slowo) - 1):
                literyx.append(chartoidx[slowo[cnt]])
                literyy.append(chartoidx[slowo[cnt+1]])
            elif cnt == (len(slowo) - 1):
                literyx.append(chartoidx[slowo[cnt]])
                literyy.append(chartoidx["<END>"])
            else:
                literyx.append(chartoidx["<EMPTY>"])
                literyy.append(chartoidx["<EMPTY>"])
                
        return np.array(literyx, dtype="float32"), np.array(literyy, dtype="long")

In [13]:
DS_train = MyDataset(trainset, chartoidx, longestword, 0)
DS_test = MyDataset(testset, chartoidx, longestword, 0)

In [14]:
BS = 8000

In [15]:
DS_train.__getitem__(10000)

IndexError: list index out of range

In [16]:
DS_test.__getitem__(0)

(array([ 2.,  4.,  5., 24., 35., 29.,  4., 10.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.], dtype=float32),
 array([44,  5, 24, 35, 29,  4, 10,  1,  0,  0,  0,  0,  0,  0,  0]))

In [17]:
DL_train = DataLoader(dataset=DS_train, batch_size=BS, num_workers=5, shuffle=True, pin_memory=False)
DL_test = DataLoader(dataset=DS_test, batch_size=BS, num_workers=5)

In [18]:
class CharacterLSTM(nn.Module):
    def __init__(self, vocabsize, lstmlayers):
        super(CharacterLSTM, self).__init__()
        
        
        ## WARSTWY
        self.embd = nn.Embedding(vocabsize, 2*vocabsize)
        self.LSTM1 = nn.GRU(2*vocabsize, vocabsize, lstmlayers, batch_first=True, bidirectional=True, dropout=0.5)
        self.linear_ins = nn.Linear(2*vocabsize, vocabsize)
        #self.linear_outs = nn.Linear(2*vocabsize, vocabsize)
        
        ## TRICKY LAYERS
#         self.norm = nn.LayerNorm(2*vocabsize)
#         self.norm2 = nn.LayerNorm(vocabsize)
        self.drop = nn.Dropout(p=0.5)
        
        ## OUTS
        self.softmax = nn.LogSoftmax(dim=2)
        
        
        self.init_weights()
        
    def init_weights(self):
        initrange = 0.1
        self.embd.weight.data.uniform_(-initrange, initrange)
        self.linear_ins.bias.data.zero_()
        self.linear_ins.weight.data.uniform_(-initrange, initrange)
#         self.linear_outs.bias.data.zero_()
#         self.linear_outs.weight.data.uniform_(-initrange, initrange)
        
    def forward(self, x, hidden, NLL=True):
#         print(x.shape)
        
        # WEJSCIE
        y0 = self.embd(x)
        y0 = self.drop(y0)
        
        # DROP
#         if drop>0:
#             y0 = self.EmbeddingDrop(y0, p=drop)
        
        # LSTM
        y, h1_ = self.LSTM1(y0, hidden)
        
        # TRICKY
        # y = self.norm(y)
        
        # y = self.drop(y)
        
        # LINEAR OUT 1
        #y = F.relu(self.linear_ins(y))
        y = self.linear_ins(y)
        
#         # TRICKY
#         y = self.norm(y)
    
#         # TRICKY 2
#         y = self.drop(y)
        
        
#         # LINEAR OUT 2
#         y = F.relu(self.linear_outs(y))
      
#         y = self.norm2(y)
        
        if NLL:
            y = self.softmax(y)
        
        # zwrot
        return y, h1_
    
#     def EmbeddingDrop(self, dataset, p=0.1):
#         w1, w2, w3 = dataset.shape
#         bern = torch.bernoulli((1-p) * torch.ones((w1, w2, 1))).to("cuda:1")
#         return dataset * bern

In [19]:
class Discriminator(nn.Module):
    def __init__(self, vocabsize, longestword):
        super(Discriminator, self).__init__()
        
        ## WARSTWY
        self.embd = nn.Embedding(vocabsize, 2*vocabsize)
        self.ins1 = nn.Linear(2*vocabsize*longestword, 2*vocabsize*longestword)
        self.hid1 = nn.Linear(2*vocabsize*longestword, vocabsize)
        self.out1 = nn.Linear(vocabsize, 1)
        
        self.norm1 = nn.BatchNorm1d(2*vocabsize*longestword)
        self.norm2 = nn.BatchNorm1d(vocabsize)
        self.drop = nn.Dropout(p=0.15)
        
    def forward(self, x):
        y = self.embd(x).view(len(x), -1)
        
        # LAYER 1
        y = self.ins1(y)
        y = F.relu(y)
        y = self.norm1(y)
        y = self.drop(y)
        
        # LAYER 2
        y = self.hid1(y)
        y = F.relu(y)
        y = self.norm2(y)
        y = self.drop(y)
        
        # LAYER OUT
        y = self.out1(y)
        y = torch.sigmoid(y)
        
        return y

In [20]:
lstms = 4

In [21]:
# chlstm = torch.load("../models/chlstm_pre_0.11745256930589676.pt").to(device)

In [22]:
chlstm = CharacterLSTM(vocabsize, lstms).to(device)

In [23]:
criterion = nn.BCELoss()
criterionPretraining = nn.NLLLoss()

In [24]:
optimizerLSTM = optim.RMSprop(chlstm.parameters(), lr=1e-3)

In [25]:
def pokrycie(zwrot, testset, device):
    d1, d2 = zwrot.shape
    
    cos = nn.CosineSimilarity(dim=1)
    
    wynik = torch.Tensor([]).to(device)
    
    indxs = [cnt for cnt in range(d1)]
    indx = [indxs[-1]]+indxs[:-1]
    
    for ii in range(d1):
        pod = cos(zwrot, testset)
        zwrot = torch.index_select(zwrot, 0, torch.Tensor(indx).long().to(device))
        wynik = torch.cat((wynik, pod), 0)
    return torch.sum(wynik.eq(1),0)

In [26]:
def accuracy(model, testsetloader, device, lstms, vocabsize):
    t_batch = tqdm_notebook(testsetloader, leave=False)
    hiddens1 = torch.randn(2*lstms, 1, vocabsize).to(device)
    
    suma = 0
    
    for batch in t_batch:
        d1, d2 = batch[0].shape

        xreals = batch[0].long().to(device)
        xfakes = torch.Tensor([]).long().to(device)
        #y = batch[1].long().cuda()
        
        ## GENEROWANIE FAKE PROBEK
        with torch.no_grad():
            chlstm.eval()

            xfake = torch.Tensor([[chartoidx["<START>"]]]).long().to(device).repeat(d1, 1)
            hidd = hiddens1.repeat(1, d1, 1)

            for cnt in range(d2-1):
                y, hidd = chlstm(xfake, hidd, drop=0)

                pred = torch.argmax(F.softmax(y.transpose(1,2), 1), 1)
                proponowanyznak = torch.index_select(pred, 1, torch.Tensor([cnt]).to(device).long())

                xfake = torch.cat((xfake, proponowanyznak.reshape(d1, 1)), 1)
            xfakes = xfake
            
            suma = suma + pokrycie(xfakes.float(), xreals.float(), device).item()
            t_batch.set_description("Acc: {:.8f}".format(suma/d1))
    t_batch.close()
    return suma/len(testsetloader)

In [51]:
epochs = 100

t_epochs = tnrange(epochs)

for epoch in t_epochs:
    t_batch = tqdm_notebook(DL_train, leave=False)
    for batch in t_batch:
        d1, d2 = batch[0].shape
        xreals = batch[0].long().to(device)
        y = batch[1].long().to(device)
        
        hiddens1 = torch.zeros(2*lstms, 1, vocabsize).repeat([1, d1, 1]).to(device)
        
        chlstm.train()
        optimizerLSTM.zero_grad()
        
        y_, hiddens1 = chlstm(xreals, hiddens1, NLL=True)
        loss = criterionPretraining(y_.transpose(1,2), y)
        
        loss.backward()
        optimizerLSTM.step()
        
        losss = loss.item()
        
        t_batch.set_description("Loss: {:.8f}".format(losss))
    t_batch.close()
    t_epochs.set_description("Epoch {}/{}".format(epoch+1, epochs))
    
#     acc = accuracy(chlstm, DL_test, device, lstms, vocabsize)
    
    print("Epoch {}/{}, Loss {:.8f}".format(epoch+1, epochs, losss))

HBox(children=(IntProgress(value=0), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 1/100, Loss 0.01366905


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 2/100, Loss 0.01107941


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 3/100, Loss 0.01080367


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 4/100, Loss 0.01237437


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 5/100, Loss 0.01264341


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 6/100, Loss 0.01106915


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 7/100, Loss 0.01248239


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 8/100, Loss 0.01047179


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 9/100, Loss 0.00995418


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 10/100, Loss 0.01090101


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 11/100, Loss 0.01025261


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 12/100, Loss 0.01071720


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 13/100, Loss 0.00972715


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 14/100, Loss 0.00954721


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 15/100, Loss 0.01146690


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 16/100, Loss 0.01159270


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 17/100, Loss 0.00950947


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 18/100, Loss 0.01061754


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 19/100, Loss 0.01031801


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 20/100, Loss 0.00870126


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 21/100, Loss 0.01294957


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 22/100, Loss 0.01071547


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 23/100, Loss 0.01012232


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 24/100, Loss 0.01116183


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 25/100, Loss 0.00951007


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 26/100, Loss 0.00887688


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 27/100, Loss 0.01205785


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 28/100, Loss 0.00992721


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 29/100, Loss 0.00961909


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 30/100, Loss 0.01100184


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 31/100, Loss 0.01108101


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 32/100, Loss 0.01011979


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 33/100, Loss 0.00834349


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 34/100, Loss 0.00902742


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 35/100, Loss 0.01056393


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 36/100, Loss 0.00842490


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 37/100, Loss 0.00975519


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 38/100, Loss 0.00912552


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 39/100, Loss 0.01010761


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 40/100, Loss 0.01106513


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 41/100, Loss 0.00775087


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 42/100, Loss 0.01023630


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 43/100, Loss 0.01063220


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 44/100, Loss 0.01033221


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 45/100, Loss 0.00781248


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 46/100, Loss 0.01009021


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 47/100, Loss 0.00900101


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 48/100, Loss 0.00883838


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 49/100, Loss 0.00836145


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 50/100, Loss 0.00767337


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 51/100, Loss 0.01005285


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 52/100, Loss 0.00856914


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 53/100, Loss 0.00726018


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 54/100, Loss 0.00652700


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 55/100, Loss 0.01003623


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 56/100, Loss 0.00810259


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 57/100, Loss 0.00769188


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 58/100, Loss 0.00675785


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 59/100, Loss 0.00651086


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 60/100, Loss 0.00778236


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 61/100, Loss 0.00595343


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 62/100, Loss 0.00798571


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 63/100, Loss 0.00701998


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 64/100, Loss 0.00818020


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 65/100, Loss 0.00734606


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 66/100, Loss 0.00876183


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 67/100, Loss 0.00719722


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 68/100, Loss 0.00896280


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 69/100, Loss 0.00928547


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 70/100, Loss 0.00906813


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 71/100, Loss 0.00681231


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 72/100, Loss 0.01039200


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 73/100, Loss 0.00770870


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 74/100, Loss 0.00659366


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 75/100, Loss 0.00755195


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 76/100, Loss 0.00810745


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 77/100, Loss 0.00730723


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 78/100, Loss 0.00784883


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 79/100, Loss 0.00693989


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 80/100, Loss 0.00765531


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 81/100, Loss 0.00735785


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 82/100, Loss 0.00862361


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 83/100, Loss 0.00590041


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 84/100, Loss 0.00677819


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 85/100, Loss 0.00570068


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 86/100, Loss 0.00724660


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 87/100, Loss 0.00952733


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 88/100, Loss 0.00597200


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 89/100, Loss 0.00604670


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 90/100, Loss 0.00593244


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 91/100, Loss 0.00526217


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 92/100, Loss 0.00470340


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 93/100, Loss 0.00749340


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 94/100, Loss 0.00553222


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 95/100, Loss 0.00724714


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 96/100, Loss 0.00541102


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 97/100, Loss 0.00699758


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 98/100, Loss 0.00618582


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 99/100, Loss 0.00533148


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Epoch 100/100, Loss 0.00599356



In [29]:
torch.save(chlstm, "chlstm_model_pretrained_second_approach.pt")

  "type " + obj.__name__ + ". It won't be checked "


RuntimeError: cuda runtime error (4) : unspecified launch failure at C:\w\1\s\tmp_conda_3.7_044431\conda\conda-bld\pytorch_1556686009173\work\torch/csrc/generic/serialization.cpp:23

In [34]:
classifier = Discriminator(vocabsize, longestword).cuda()
optimizerClassifier = optim.Adam(classifier.parameters(), lr=1e-3)

In [45]:
epochs=4

lossGs = []
lossDs = []

lossGs2 = []
lossDs2 = []

t_epochs = tnrange(epochs)

for epoch in t_epochs:
    hiddens1 = torch.zeros(2*lstms, 1, vocabsize).to(device) #todo cuda
    
    t_batch = tqdm_notebook(DL_train, leave=False)
    for batch in t_batch:
        d1, d2 = batch[0].shape

        xreals = batch[0].long().to(device)
        xfakes = torch.Tensor([]).long().to(device)
        #y = batch[1].long().cuda()
        
        ## GENEROWANIE FAKE PROBEK
        with torch.no_grad():
            chlstm.eval()
            
            xfake = torch.Tensor([[chartoidx["<START>"], random.randint(0, len(chartoidx)-1)]]).long().to(device).repeat(d1, 1)
            hidd = hiddens1.repeat(1, d1, 1)

            for cnt in range(d2-2):
                y, hidd = chlstm(xfake, hidd, drop=0.1)

                pred = torch.argmax(F.softmax(y.transpose(1,2), 1), 1)
                proponowanyznak = torch.index_select(pred, 1, torch.Tensor([cnt]).long().to(device))

                xfake = torch.cat((xfake, proponowanyznak.reshape(d1, 1)), 1)
            xfakes = xfake
        
        ## DANE DLA KLASYFIKATORA
        
        ## NAUKA KLASYFIKATORA
        classifier.train()
        optimizerClassifier.zero_grad()
        
        RealOuts = torch.ones((d1, 1)).float().to(device)
        TensorOut = torch.cat((RealOuts, torch.zeros((d1, 1)).float().to(device)), 0)
        TensorIns = torch.cat((xreals, xfakes))
        
        y0 = classifier(TensorIns)

        lossC = criterion(y0, TensorOut)
        
        lossC.backward()
        optimizerClassifier.step()
        
        
        ## NAUKA GENERATORA - LSTM
        chlstm.train()
        optimizerLSTM.zero_grad()
        
        y1 = classifier(xfakes)
        lossG = criterion(y1, RealOuts)
        
        lossG.backward()
        optimizerLSTM.step()
        
        losssG = lossG.item()
        losssC = lossC.item()
        
        lossGs.append(losssG)
        lossDs.append(losssC)
        
        
        
        
        t_batch.set_description("Loss C: {:.6f}, Loss G: {:.6f}".format(losssC, losssG))
    t_batch.close()
    t_epochs.set_description("Epoch {}/{}".format(epoch+1, epochs))
    
    
    acc = accuracy(chlstm, DL_test, device, lstms, vocabsize)
    
    print("Epoch {}/{}, Loss Classifier: {:.8f}, Loss LSTM: {:.8f}, Accuracy {:.3f}".format(epoch+1, epochs, lossC.item(), lossG.item(), acc))

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

TypeError: forward() got an unexpected keyword argument 'drop'

## TODOO
Przy generowaniu fake próbki zrobić, że po otrzymaniu < END > wszystko dalej odcina i zamienia na empty

In [56]:
torch.save(chlstm, "chlstm_model.pt")
torch.save(classifier, "classifier_model.pt")

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [105]:
slowostart = "a"
hiddens = torch.zeros(2*lstms, 1, vocabsize).to(device)

with torch.no_grad():
    lastchar = 0
    cnt = 0
    
    chlstm.eval()
    znaki = [[chartoidx["<START>"]]] + [[chartoidx[item]] for item in slowostart]
    bazastart = len(znaki)
    
    chartable = list(chartoidx.keys())

    while lastchar != chartoidx["<END>"]:
        x = torch.Tensor([znaki[cnt]]).long().to(device)

        y, hiddens = chlstm(x, hiddens, NLL=True)
        prediction = torch.argmax(y.transpose(1,2), 1).view(-1)

        if cnt >= bazastart - 1:
            znaki.append([prediction.item()])
        
        cnt = cnt + 1
        
        if cnt > 10:
            break
    
    strtranslated = ""
    for item in znaki:
        strtranslated = strtranslated + chartable[item[0]]
        
    print(strtranslated)
    
    
#     while
    
#     slw = [chartoidx[ss] for ss in start]
    
#     xx = torch.Tensor([slw]).long().to(device)
    
#     y, hiddens1 = chlstm(xx, hiddens1, NLL=True)
    
#     pred = torch.argmax(y.transpose(1,2), 1)
    
#     dd = ""
#     for itm in pred.view(-1).cpu().detach().numpy():
#         dd = dd + str(list(chartoidx.keys())[itm])

#     print(dd)

<START>awmnn111111


In [21]:
for epoch in range(20):
    for batch in dataloader:
        ## TODO TO CUDA
        x = batch[0].long()
        y = batch[1].long()
        
        hidd = (hiddens1.repeat([1, BS, 1]), hiddens2.repeat([1, BS, 1])) #todo cuda both
        
        chlstm.train()
        optimizer.zero_grad()
        
        for pos in range(longestword):
            xx = torch.index_select(x, 1, torch.Tensor([pos]).long()) #todo cuda
            yy = torch.index_select(y, 1, torch.Tensor([pos]).long()) #todo cuda
        
#             print(xx)
#             print(yy)
        
            y_, hidd = chlstm(xx, hidd)
#             print(y_.shape, y_.transpose(1,2).shape, yy.shape)
            loss = criterion(y_.transpose(1,2), yy)
            
        loss.backward()
        optimizer.step()
        
    if epoch%2 == 0:
        print(str(epoch) + ": " + str(loss.item()))

0: 0.00012402894208207726
2: 0.005619823466986418
4: 3.2186435419134796e-06
6: 1.847741259553004e-06
8: 5.960462772236497e-07


KeyboardInterrupt: 

In [33]:
with torch.no_grad():
    chlstm.eval()
    
    start = ["<START>", "n", "i", "e", "r", "o"]
    
    hidd = (hiddens1, hiddens2) #todo cuda
    
    for letter in start:
        xx = torch.Tensor([[chartoidx[letter]]]).long() #todo cuda
        
        y, hidd = chlstm(xx, hidd)
        
        pred = torch.argmax(F.log_softmax(y.transpose(1,2), 1), 1)
        
    for cnt in range(longestword - len(start)):
        
        

tensor([[1]])
tensor([[1]])
tensor([[1]])
tensor([[1]])
tensor([[1]])
tensor([[1]])


## TODOs
Sprawdzić kombinacje w której podajemy normalnie jak wcześniej całą sekwencję, ale bez BatchNormów.

## Ostatni UPDATE przed wyjściem z pociągu

Podejście słabe, bo loss jest liczony z ostatniego elementu tylko ( "< END >") i predykcje będą na niego wskazywać. Skorzystać z TODOs'a

In [20]:
for epoch in range(2):
    for batch in dataloader:
        # PRZYGOTOWANIE DANYCH
        x = batch[0]
        y = batch[1]
        
        # wrzucenie na GPU
        x = x.long().cuda()
        y = y.long().cuda()

        for cnt in range(len(x)):
            print(x[cnt].shape)
            print(y[cnt].shape)
            
            
            
            # TRENING
            chlstm.train()
            optimizer.zero_grad()
        
            # PRZYGOTOWANIE HIDDENS
            if x[cnt].item() == chartoidx["<START>"]:
                hiddens = (hiddens1, hiddens2)
        
            wynik, hiddens = chlstm(x[cnt].view(1, 1), hiddens)
            
            loss = criterion(wynik.transpose(1,2).unsqueeze(1), y[cnt])

            loss.backward()
            optimizer.step()

    if epoch%50==0:
        print(str(epoch) + ": " + str(loss.item()))
        
#     if epoch % 50 == 0:
#         torch.save(chlstm, "model_chlstm_"+str(epoch)+".pt")

torch.Size([1])
torch.Size([1])
torch.Size([1, 1])


RuntimeError: 1only batches of spatial targets supported (non-empty 3D tensors) but got targets of size: : [1]

In [32]:
start = "a"

In [45]:
pslowa = [start, "b", "m", "i", "c", "o"]

h1 = (torch.index_select(hiddens[0], 1, torch.Tensor([0]).long().cuda()), torch.index_select(hiddens[1], 1, torch.Tensor([0]).long().cuda()))

with torch.no_grad():
    preds = torch.zeros((12)).long()
    
    chlstm.eval()
    
    for cnt in range(longestword-1):
        preds[cnt] = chartoidx[pslowa[cnt]]
        
        preds = preds.cuda()
        
        pred, h1 = chlstm(preds.reshape(1, -1), h1)
        
        out = torch.argmax(F.softmax(pred, dim=2), 2).reshape(-1).cpu().detach().numpy()
        
        pslowa.append(list(chartoidx.keys())[out[cnt]])
        
pslowa

['a',
 'b',
 'm',
 'i',
 'c',
 'o',
 'm',
 '<EMPTY>',
 '<EMPTY>',
 '<EMPTY>',
 '<EMPTY>',
 '<EMPTY>',
 '<EMPTY>',
 '<EMPTY>',
 '<EMPTY>',
 '<EMPTY>',
 '<EMPTY>']