In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import spacy
import numpy as np
import pandas as pd

import random
import math
import time

SEED = 25

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [2]:
from BERT_WordEmbeddingsPipeline import embeddingsPipeline
embedding_length = 768
zero_embedding = [0 for i in range(embedding_length)]

In [3]:
data = pd.read_csv('hpl.csv')
data = data[:100]
print(data.head())
print(data.shape)

                                     tokenized_sents
0  It never once occurred to me that the fumbling...
1  Finding nothing else not even gold the Superin...
2  Herbert West needed fresh bodies because his l...
3  The farm like grounds extended back very deepl...
4  His facial aspect too was remarkable for its m...
(100, 1)


In [4]:
max_words = 100

emb = []
for i in data['tokenized_sents']:
    e = embeddingsPipeline(i)
    while(len(e) < max_words):
        e.append(zero_embedding)
    e = e[:max_words]
    emb.append(e)
    
data['embeddings'] = emb

In [5]:
from sklearn.model_selection import train_test_split
train_data, remaining_data = train_test_split(data, test_size=0.2, random_state=SEED)
test_data, valid_data = train_test_split(remaining_data, test_size=0.5, random_state=SEED)

print(train_data.shape, test_data.shape, valid_data.shape)

(80, 2) (10, 2) (10, 2)


In [6]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(Encoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.dropout = nn.Dropout(p=0.2)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.relu = nn.ReLU()
        
        # initializing weights
        nn.init.xavier_uniform(self.lstm.weight_ih_l0, gain=np.sqrt(2))
        nn.init.xavier_uniform(self.lstm.weight_hh_l0, gain=np.sqrt(2))
         
    def forward(self, input):
        input = self.dropout(input)
        encoded_input, hidden = self.lstm(input)
        encoded_input = self.relu(encoded_input)
        return encoded_input
    
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(hidden_size, output_size, num_layers, batch_first=True)
        
        # initializing weights
        nn.init.xavier_uniform(self.lstm.weight_ih_l0, gain=np.sqrt(2))
        nn.init.xavier_uniform(self.lstm.weight_hh_l0, gain=np.sqrt(2))
       
    def forward(self, encoded_input):
        decoded_output, hidden = self.lstm(encoded_input)
        return decoded_output 

In [7]:
class Generator(nn.Module):
    def __init__(self, encoder, decoder):
        super(Generator, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def forward(self, input):
        encoded_input = self.encoder(input)
        decoded_output = self.decoder(encoded_input)
        return decoded_output

In [8]:
class Discriminator(nn.Module):
    def __init__(self, input_size, num_layers, max_words):
        super(Discriminator, self).__init__()
        self.lstm = nn.LSTM(input_size, 1, num_layers, batch_first=True)
        self.fc1 = nn.Linear(max_words, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, input):
        output, hidden = self.lstm(input)
        output = torch.flatten(output)
        output = self.fc1(output)
        output = self.sigmoid(output)
        return output

In [9]:
INPUT_SIZE = embedding_length
HIDDEN_SIZE = 512
NUM_LAYERS = 2
OUTPUT_SIZE = embedding_length

enc = Encoder(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS)
dec = Decoder(HIDDEN_SIZE, OUTPUT_SIZE, NUM_LAYERS)
gen = Generator(enc, dec)

dis = Discriminator(INPUT_SIZE, 1, max_words)

  del sys.path[0]
  


In [10]:
def trainGAN(data, gen, dis, epochs, max_words, embedding_length):
    gen.train()
    dis.train()
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    gen.to(device)
    dis.to(device)
    
    criterion = nn.BCELoss()
    d_optimizer = torch.optim.Adam(dis.parameters(), lr=0.0002)
    g_optimizer = torch.optim.Adam(gen.parameters(), lr=0.0002)
    
    for epoch in range(epochs):
        start_time = time.time()
        
        real_label = torch.ones(1).to(device)
        fake_label = torch.zeros(1).to(device)
        
    # Training Discriminator
        total_discriminator_loss = 0
        for text in data:            
            text = torch.Tensor(text)
            text = text.to(device)
            text = text.view(max_words, 1, embedding_length)
            dis_out = dis(text)
            d_loss_real = criterion(dis_out, real_label)
            
            fake_text = gen(text)
            fake_text = fake_text.view(max_words, 1, embedding_length)
            dis_out = dis(fake_text)
            d_loss_fake = criterion(dis_out, fake_label)
            
            d_loss = d_loss_real + d_loss_fake
            total_discriminator_loss += d_loss.item()
            
            d_optimizer.zero_grad()
            g_optimizer.zero_grad()
            
            d_loss.backward()
            d_optimizer.step()
        
    # Training Discriminator
        total_generator_loss = 0
        for text in data:
            text = torch.Tensor(text)
            text = text.to(device)
            text = text.view(max_words, 1, embedding_length)
            
            generated_text = gen(text)
            dis_out = dis(generated_text)
            
            g_loss = criterion(dis_out, real_label)
            total_generator_loss += g_loss.item()
            
            d_optimizer.zero_grad()
            g_optimizer.zero_grad()
            
            g_loss.backward()
            g_optimizer.step()
        
        finish_time = time.time()
        
        avg_d_loss = total_discriminator_loss/len(data)
        avg_g_loss = total_generator_loss/len(data)
        time_taken = finish_time-start_time
        
        print("Epoch[{:02}/{}]: average_d_Loss: {:.4f}, average_g_Loss: {:.4f}, Time for epoch: {:.4f}" 
             .format(epoch+1, epochs, avg_d_loss, avg_g_loss, time_taken))

In [11]:
trainGAN(data['embeddings'], gen, dis, 25, max_words, embedding_length)

Epoch[01/25]: average_d_Loss: 1.3922, average_g_Loss: 0.6519, Time for epoch: 3.3999
Epoch[02/25]: average_d_Loss: 1.1759, average_g_Loss: 0.7216, Time for epoch: 3.1256
Epoch[03/25]: average_d_Loss: 1.3395, average_g_Loss: 0.5841, Time for epoch: 3.1246
Epoch[04/25]: average_d_Loss: 1.3160, average_g_Loss: 0.7826, Time for epoch: 3.1276
Epoch[05/25]: average_d_Loss: 1.0386, average_g_Loss: 1.0207, Time for epoch: 3.1376
Epoch[06/25]: average_d_Loss: 0.8382, average_g_Loss: 1.0028, Time for epoch: 3.1675
Epoch[07/25]: average_d_Loss: 1.0375, average_g_Loss: 0.7140, Time for epoch: 3.1745
Epoch[08/25]: average_d_Loss: 0.9628, average_g_Loss: 0.7318, Time for epoch: 3.1296
Epoch[09/25]: average_d_Loss: 0.8924, average_g_Loss: 0.7491, Time for epoch: 3.1486
Epoch[10/25]: average_d_Loss: 0.8298, average_g_Loss: 0.7661, Time for epoch: 3.3321
Epoch[11/25]: average_d_Loss: 0.7756, average_g_Loss: 0.7827, Time for epoch: 3.4787
Epoch[12/25]: average_d_Loss: 0.7311, average_g_Loss: 0.7990, Tim