In [1]:
import torch
import torch.nn as nn

import spacy
import numpy as np
import pandas as pd

import random
import math
import time

from BERT_WordEmbeddingsPipeline import embeddingsPipeline
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore") 

SEED = 25

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [2]:
data = pd.read_csv('hpl.csv')
data = data[:100]

embedding_length = 768
max_words = 100

zero_embedding = [0 for i in range(embedding_length)]

emb = []
for i in data['tokenized_sents']:
    e = embeddingsPipeline(i)
    while(len(e) < max_words):
        e.append(zero_embedding)
    e = e[:max_words]
    emb.append(e)
data['embeddings'] = emb

In [3]:
INPUT_SIZE = embedding_length
HIDDEN_SIZE = 512
NUM_LAYERS = 2
OUTPUT_SIZE = embedding_length
BATCH_SIZE = 32

In [4]:
emb = torch.Tensor(data['embeddings'])
dataSet = torch.utils.data.TensorDataset(emb)
dataLoader = torch.utils.data.DataLoader(dataSet, batch_size=BATCH_SIZE, drop_last=True)

In [5]:
from Encoder import Encoder
from Decoder import Decoder
from Generator import Generator
from Discriminator import Discriminator

In [6]:
enc = Encoder(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS)
dec = Decoder(HIDDEN_SIZE, OUTPUT_SIZE, NUM_LAYERS)
gen = Generator(enc, dec, max_words, HIDDEN_SIZE, embedding_length)
print(gen)

dis = Discriminator(INPUT_SIZE, NUM_LAYERS, max_words)
print(dis)

Generator(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (lstm): LSTM(768, 512, num_layers=2, batch_first=True, bidirectional=True)
    (fc1): Linear(in_features=1024, out_features=512, bias=True)
    (lrelu): LeakyReLU(negative_slope=0.01)
  )
  (decoder): Decoder(
    (lstm): LSTM(512, 768, num_layers=2, batch_first=True, bidirectional=True)
    (fc1): Linear(in_features=1536, out_features=768, bias=True)
  )
)
Discriminator(
  (lstm): LSTM(768, 1, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=100, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [7]:
def train(dataLoader, gen, dis, num_epochs, max_words, batch_size, embedding_length):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    gen.to(device)
    dis.to(device)
    
    dis.train()
    gen.train()
    
    criterion = nn.BCELoss()
    d_optimizer = torch.optim.Adam(dis.parameters(), lr=0.0002)
    g_optimizer = torch.optim.Adadelta(gen.parameters(), lr=0.0004)
    
    for epoch in range(num_epochs):
        start_time = time.time()

        real_labels = torch.ones(1, batch_size).to(device)
        fake_labels = torch.zeros(1, batch_size).to(device)
        
        #---------------------------------------------------#
        # Training Discriminator
        #---------------------------------------------------#
        total_discriminator_loss = 0
        iterator = iter(dataLoader)
        
        for i in range(len(dataLoader)):
            batch_data = next(iterator) 
            # batch_data is a one-element list containing a single batch in the form of Tensor.
            batch = batch_data[0]
            
            batch = batch.to(device)
            
            dis_output = torch.zeros(batch_size)
            for i, embedding in enumerate(batch):
                dis_output[i] = dis(embedding.view(1, max_words, embedding_length))
            
            dis_output = dis_output.to(device)
            d_loss_real = criterion(dis_output, real_labels)
            
            fake_batch = gen(batch, 0.5)
            fake_batch = fake_batch.to(device)
            
            dis_output = torch.zeros(batch_size)
            for i, embedding in enumerate(fake_batch):
                dis_output[i] = dis(embedding.view(1, max_words, embedding_length))
                
            dis_output = dis_output.to(device)
            d_loss_fake = criterion(dis_output, fake_labels)
            
            d_loss = d_loss_real + d_loss_fake
            total_discriminator_loss += d_loss.item()
            
            d_optimizer.zero_grad()
            d_loss.backward()
            d_optimizer.step()
    
        #---------------------------------------------------#
        # Training Generator
        #---------------------------------------------------#
        total_generator_loss = 0
        iterator = iter(dataLoader)
        
        for i in range(len(dataLoader)):
            batch_data = next(iterator) 
            # batch_data is a one-element list containing a single batch in the form of Tensor.
            batch = batch_data[0]
            batch = batch.to(device)
            
            generated_batch = gen(batch, 0.5)
            generated_batch = generated_batch.to(device)
            
            dis_output = torch.zeros(batch_size)
            for i, embedding in enumerate(generated_batch):
                dis_output[i] = dis(embedding.view(1, max_words, embedding_length))
            
            dis_output = dis_output.to(device)
            g_loss = criterion(dis_output, real_labels)
            total_generator_loss += g_loss.item()
            
            g_optimizer.zero_grad()
            g_loss.backward()
            g_optimizer.step()
            
        finish_time = time.time()
        avg_d_loss = total_discriminator_loss/len(dataLoader)
        avg_g_loss = total_generator_loss/len(dataLoader)
        time_taken = finish_time-start_time
        
        print("Epoch[{:02}/{}]: average_d_Loss: {:.4f}, average_g_Loss: {:.4f}, Time for epoch: {:.4f}" 
             .format(epoch+1, num_epochs, avg_d_loss, avg_g_loss, time_taken))

In [8]:
train(dataLoader, gen, dis, 10, max_words, BATCH_SIZE, embedding_length)

Epoch[01/10]: average_d_Loss: 1.3871, average_g_Loss: 0.6833, Time for epoch: 85.4287
Epoch[02/10]: average_d_Loss: 1.3866, average_g_Loss: 0.6932, Time for epoch: 89.9586
Epoch[03/10]: average_d_Loss: 1.3867, average_g_Loss: 0.7018, Time for epoch: 95.5795
Epoch[04/10]: average_d_Loss: 1.3845, average_g_Loss: 0.7064, Time for epoch: 103.9183
Epoch[05/10]: average_d_Loss: 1.3855, average_g_Loss: 0.7093, Time for epoch: 93.7973
Epoch[06/10]: average_d_Loss: 1.3852, average_g_Loss: 0.7104, Time for epoch: 98.3984
Epoch[07/10]: average_d_Loss: 1.3851, average_g_Loss: 0.7097, Time for epoch: 102.0737
Epoch[08/10]: average_d_Loss: 1.3843, average_g_Loss: 0.7079, Time for epoch: 103.8758
Epoch[09/10]: average_d_Loss: 1.3819, average_g_Loss: 0.7070, Time for epoch: 107.3690
Epoch[10/10]: average_d_Loss: 1.3823, average_g_Loss: 0.7069, Time for epoch: 103.0192
