In [1]:
!wget https://raw.githubusercontent.com/ccc-frankfurt/Practical_ML_SS21/master/week06/sonnets.txt

--2024-05-12 19:56:30--  https://raw.githubusercontent.com/ccc-frankfurt/Practical_ML_SS21/master/week06/sonnets.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 94081 (92K) [text/plain]
Saving to: ‘sonnets.txt’


2024-05-12 19:56:30 (8.90 MB/s) - ‘sonnets.txt’ saved [94081/94081]



In [2]:
# Open shakespeare text file and read the data
with open('sonnets.txt', 'r') as f:
    text = f.read()

# print an excerpt of the text
print(text[:200])

From fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou contracted to thine own 


In [4]:
import numpy as np

# Create character dictionaries
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}

# Encode the text
encoded = np.array([char2int[ch] for ch in text])

# Define method to make mini-batches for training
def get_batches(arr, batch_size, seq_length):
    batch_size_total = batch_size * seq_length
    n_batches = len(arr) // batch_size_total
    arr = arr[:n_batches * batch_size_total]
    arr = arr.reshape((batch_size, -1))
    for n in range(0, arr.shape[1], seq_length):
        x = arr[:, n:n+seq_length]
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

# One-hot encode function
def one_hot_encode(arr, n_labels):
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    return one_hot

In [6]:
!pip install torch
import torch.nn as nn

Successfully installed nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.19.3 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.1.105


In [9]:
import numpy as np
import torch.nn.functional as F
from torch.nn import Parameter
import os
import urllib.request
# For handling data and file operations
import os
import urllib.request

# For visualization
import matplotlib.pyplot as plt

# For monitoring training progress
from tqdm import tqdm

# For saving and loading models



In [23]:

class Generator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Generator, self).__init__()
        self.hidden_size = hidden_size
        self.n_chars = output_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        output, hidden = self.lstm(x, hidden)
        output = self.fc(output)
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size),
                torch.zeros(1, batch_size, self.hidden_size))


In [11]:
class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Discriminator, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x, hidden):
        output, hidden = self.lstm(x, hidden)
        output = self.fc(output)
        return torch.sigmoid(output), hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size),
                torch.zeros(1, batch_size, self.hidden_size))


In [32]:
def train_gan(generator, discriminator, data, device, generator_optimizer, discriminator_optimizer, criterion, epochs=10, batch_size=128, seq_length=100, clip=0.01):
    generator.train()
    discriminator.train()

    for epoch in range(epochs):
        for x, y in get_batches(data, batch_size, seq_length):
            # Generate fake data using the generator
            fake_inputs = torch.randn(batch_size, seq_length, input_size).to(device)
            fake_outputs, _ = generator(fake_inputs, generator.init_hidden(batch_size))

            # Train the discriminator
            discriminator_optimizer.zero_grad()

            real_inputs = torch.from_numpy(one_hot_encode(x, generator.n_chars)).to(device)
            real_outputs, _ = discriminator(real_inputs, discriminator.init_hidden(batch_size))
            real_loss = criterion(real_outputs, torch.ones_like(real_outputs))

            fake_loss = criterion(fake_outputs, torch.zeros_like(fake_outputs))

            discriminator_loss = real_loss + fake_loss
            discriminator_loss.backward()
            discriminator_optimizer.step()

            # Train the generator
            generator_optimizer.zero_grad()

            fake_inputs = torch.randn(batch_size, seq_length, input_size).to(device)
            fake_outputs, _ = generator(fake_inputs, generator.init_hidden(batch_size))
            discriminator_outputs, _ = discriminator(fake_outputs, discriminator.init_hidden(batch_size))
            generator_loss = criterion(discriminator_outputs, torch.ones_like(discriminator_outputs))

            generator_loss.backward()
            generator_optimizer.step()

        print("Epoch: {}/{}:".format(epoch + 1, epochs),
              "Generator Loss: {:.4f}, Discriminator Loss: {:.4f}".format(generator_loss.item(), discriminator_loss.item()))


In [15]:
!pip install torch
import torch

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [25]:
# Check variable definitions
print(generator, discriminator, encoded, device, generator_optimizer, discriminator_optimizer, criterion, epochs, batch_size, seq_length)

# Check imports
print(Generator, Discriminator, nn.BCELoss)


Generator(
  (lstm): LSTM(61, 512, batch_first=True)
  (fc): Linear(in_features=512, out_features=61, bias=True)
) Discriminator(
  (lstm): LSTM(61, 512, batch_first=True)
  (fc): Linear(in_features=512, out_features=1, bias=True)
) [ 9 47 32 ... 42 52 35] cpu Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
) Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
) BCELoss() 10 128 100
<class '__main__.Generator'> <class '__main__.Discriminator'> <class 'torch.nn.modules.loss.BCELoss'>


In [27]:
input_size = len(chars)
hidden_size = 512

generator = Generator(input_size, hidden_size, input_size).to(device)
discriminator = Discriminator(input_size, hidden_size).to(device)

generator_optimizer = torch.optim.Adam(generator.parameters(), lr=0.001)
discriminator_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.001)

criterion = nn.BCELoss()


In [28]:
# Define epochs and batch size
#epochs = 10  # You can adjust this value as needed
#batch_size = 128  # You can adjust this value as needed
#seq_length = 100  # This should be the same sequence length used during data preprocessing

train_gan(generator, discriminator, encoded, device, generator_optimizer, discriminator_optimizer, criterion, epochs=10, batch_size=128, seq_length=100)



RuntimeError: all elements of input should be between 0 and 1