In [4]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np

In [3]:
class BasqueDataset(Dataset):
    
    def __init__(self, file_path, transform=None):
        with open(file_path, 'r') as f:
            self.text = [line.strip() for line in f.readlines()]
        self.transform = transform

    def __len__(self):
        return len(self.text)
    
    def __getitem__(self, idx):
        sample = self.text[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample

In [None]:
dataset = BasqueDataset("")
data_loader =  DataLoader(dataset, 
                          batch_size=32, 
                          shuffle=True)

In [None]:
class Generator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(Generator, self).__init__()
        
        """Define the LSTM layer with the specified number of layers"""
        self.lstm = nn.LSTM(input_size, 
                            hidden_size, 
                            num_layers=num_layers, 
                            batch_first=True)
        
        """Define the linear layer that will map the hidden state output to the output size (vocab size)"""
        self.linear = nn.Linear(hidden_size, 
                                output_size)
        
    def forward(self, x, prev_states):
        lstm_out, states = self.lstm(x, prev_states)
        
        """We take the output of the last LSTM layer for each sequence and pass it through the linear layer
        lstm_out is of shape (batch_size, sequence_length, hidden_size) We want to pass the output of the last 
        time step to the linear layer which is lstm_out[:, -1, :]"""

        last_time_step_out = lstm_out[:, -1, :]
        out = self.linear(last_time_step_out)
        out = nn.functional.softmax(out, dim=1)
        
        return out, states


input_size = 1000  # size of the vocabulary
hidden_size = 256  # size of the hidden layers
output_size = input_size  # output size (typically the vocab size)
num_layers = 4  # The number of LSTM layers

generator = Generator(input_size, 
                      hidden_size, 
                      output_size, 
                      num_layers)

# initialize the hidden and cell states
initial_states = (
    torch.zeros(num_layers, batch_size, hidden_size),
    torch.zeros(num_layers, batch_size, hidden_size)
    )

In [None]:
class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(Discriminator, self).__init__()

        self.LSTM = nn.LSTM(input_size,
                            hidden_size,
                            num_layers=num_layers,
                            batch_first=True)
        
        self.linear = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x, hidden):
        lstm_out, _ = self.LSTM(x, hidden)
        lstm_out = lstm_out[:, -1, :]
        out = self.linear(lstm_out)
        out = self.sigmoid(out)
        return out

input_size = 1000  # size of the vocabulary
hidden_size = 256  # size of the hidden layers
num_layers = 4  # The number of LSTM layers

discriminator = Discriminator(input_size,hidden_size,num_layers)

discriminator_state = (
    torch.zeros(num_layers, batch_size, hidden_size),
    torch.zeros(num_layers, batch_size, hidden_size)
    )

In [None]:
# Loss
criterion = nn.BCELoss()
optimizer_gen = torch.optim.Adam(generator.parameters(), lr=0.001)
optimizer_dis = torch.optim.Adam(discriminator.parameters(), lr=0.001)

In [None]:
# Training parameters
num_epochs = 100
batch_size = 32

# Label smoothing parameters for discriminator training
real_label = 0.9
fake_label = 0.1

for epoch in range(num_epochs):
    for i, (real_data,) in enumerate(data_loader): 
        
        # real and fake labels for use in the loss
        label_real = torch.full((batch_size,), real_label, dtype=torch.float)
        label_fake = torch.full((batch_size,), fake_label, dtype=torch.float)

        # Move labels to the same device as the generator and discriminator
        if torch.cuda.is_available():
            label_real = label_real.cuda()
            label_fake = label_fake.cuda()

        discriminator.zero_grad()

        output_real = discriminator(real_data).view(-1)
        error_real = criterion(output_real, label_real)
        error_real.backward()
        
        noise = torch.randn(batch_size, input_size)
        fake_data = generator(noise)
        
        output_fake = discriminator(fake_data.detach()).view(-1)
        error_fake = criterion(output_fake, label_fake)
        error_fake.backward()
        
        # Update discriminator
        optimizer_dis.step()

        generator.zero_grad()
        
        # We want the fake data to be classified as real
        output = discriminator(fake_data).view(-1)
        error_g = criterion(output, label_real)  # Use real labels for the generator loss
        error_g.backward()
        
        # Update generator
        optimizer_gen.step()

        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f'
                  % (epoch, num_epochs, i, len(data_loader),
                     error_real.item() + error_fake.item(), error_g.item(),
                     output_real.mean().item(), output_fake.mean().item()))
