In [1]:
import torch
import torch.nn as nn

import spacy
import numpy as np
import pandas as pd

import random
import math
import time

from BERT_WordEmbeddingsPipeline import embeddingsPipeline
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore") 

SEED = 25

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [2]:
data = pd.read_csv('hpl.csv')
data = data[:100]

embedding_length = 768
max_words = 100

zero_embedding = [0 for i in range(embedding_length)]

emb = []
for i in data['tokenized_sents']:
    e = embeddingsPipeline(i)
    while(len(e) < max_words):
        e.append(zero_embedding)
    e = e[:max_words]
    emb.append(e)
data['embeddings'] = emb

In [3]:
INPUT_SIZE = embedding_length
HIDDEN_SIZE = 512
NUM_LAYERS = 2
OUTPUT_SIZE = embedding_length
BATCH_SIZE = 32

In [4]:
emb = torch.Tensor(data['embeddings'])
dataSet = torch.utils.data.TensorDataset(emb)
dataLoader = torch.utils.data.DataLoader(dataSet, batch_size=BATCH_SIZE, drop_last=True)

In [5]:
from Encoder import Encoder
from Decoder import Decoder
from Generator import Generator
from Discriminator import Discriminator

In [33]:
enc = Encoder(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS)
dec = Decoder(HIDDEN_SIZE, OUTPUT_SIZE, NUM_LAYERS)
gen = Generator(enc, dec)
print(gen)

dis = Discriminator(INPUT_SIZE, NUM_LAYERS, max_words, BATCH_SIZE)
print(dis)

Generator(
  (encoder): Encoder(
    (dropout): Dropout(p=0.2, inplace=False)
    (lstm): LSTM(768, 512, num_layers=2, batch_first=True, bidirectional=True)
    (lrelu): LeakyReLU(negative_slope=0.01)
  )
  (decoder): Decoder(
    (lstm): LSTM(1024, 768, num_layers=2, batch_first=True, bidirectional=True)
    (fc1): Linear(in_features=1536, out_features=768, bias=True)
  )
)
Discriminator(
  (lstm): LSTM(768, 1, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=3200, out_features=32, bias=True)
  (sigmoid): Sigmoid()
)


In [34]:
def train(dataLoader, gen, dis, num_epochs, max_words, batch_size, embedding_length):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    gen.to(device)
    dis.to(device)
    
    dis.train()
    gen.train()
    
    criterion = nn.BCELoss()
    d_optimizer = torch.optim.Adam(dis.parameters(), lr=0.0002)
    g_optimizer = torch.optim.Adadelta(gen.parameters(), lr=0.0004)
    
    for epoch in range(num_epochs):
        start_time = time.time()

        real_labels = torch.ones(1, batch_size).to(device)
        fake_labels = torch.zeros(1, batch_size).to(device)
        
        #---------------------------------------------------#
        # Training Discriminator
        #---------------------------------------------------#
        total_discriminator_loss = 0
        iterator = iter(dataLoader)
        
        for i in range(len(dataLoader)):
            batch_data = next(iterator) 
            # batch_data is a one-element list containing a single batch in the form of Tensor.
            batch = batch_data[0]
            
            batch = batch.to(device)
            dis_output = dis(batch)
            d_loss_real = criterion(dis_output, real_labels)
            
            fake_batch = gen(batch)
            dis_output = dis(fake_batch)
            d_loss_fake = criterion(dis_output, fake_labels)
            
            d_loss = d_loss_real + d_loss_fake
            total_discriminator_loss += d_loss.item()
            
            d_optimizer.zero_grad()
            d_loss.backward()
            d_optimizer.step()
    
        #---------------------------------------------------#
        # Training Generator
        #---------------------------------------------------#
        total_generator_loss = 0
        iterator = iter(dataLoader)
        
        for i in range(len(dataLoader)):
            batch_data = next(iterator) 
            # batch_data is a one-element list containing a single batch in the form of Tensor.
            batch = batch_data[0]
            batch = batch.to(device)
            
            generated_batch = gen(batch)
            dis_output = dis(generated_batch)
            
            g_loss = criterion(dis_output, real_labels)
            total_generator_loss += g_loss.item()
            
            g_optimizer.zero_grad()
            g_loss.backward()
            g_optimizer.step()
            
        finish_time = time.time()
        avg_d_loss = total_discriminator_loss/len(dataLoader)
        avg_g_loss = total_generator_loss/len(dataLoader)
        time_taken = finish_time-start_time
        
        print("Epoch[{:02}/{}]: average_d_Loss: {:.4f}, average_g_Loss: {:.4f}, Time for epoch: {:.4f}" 
             .format(epoch+1, num_epochs, avg_d_loss, avg_g_loss, time_taken))

In [35]:
train(dataLoader, gen, dis, 10, max_words, BATCH_SIZE, embedding_length)

Epoch[01/10]: average_d_Loss: 1.3791, average_g_Loss: 0.7068, Time for epoch: 2.4984
Epoch[02/10]: average_d_Loss: 1.3583, average_g_Loss: 0.7206, Time for epoch: 1.9787
Epoch[03/10]: average_d_Loss: 1.3382, average_g_Loss: 0.7347, Time for epoch: 1.9398
Epoch[04/10]: average_d_Loss: 1.3184, average_g_Loss: 0.7490, Time for epoch: 1.9667
Epoch[05/10]: average_d_Loss: 1.2991, average_g_Loss: 0.7632, Time for epoch: 1.9688
Epoch[06/10]: average_d_Loss: 1.2805, average_g_Loss: 0.7770, Time for epoch: 1.9618
Epoch[07/10]: average_d_Loss: 1.2628, average_g_Loss: 0.7896, Time for epoch: 1.9648
Epoch[08/10]: average_d_Loss: 1.2463, average_g_Loss: 0.8007, Time for epoch: 1.9787
Epoch[09/10]: average_d_Loss: 1.2316, average_g_Loss: 0.8100, Time for epoch: 1.9747
Epoch[10/10]: average_d_Loss: 1.2185, average_g_Loss: 0.8169, Time for epoch: 1.9718
