<a href="https://colab.research.google.com/github/Eupham/-/blob/master/GAN%20and%20stuff.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-2.13.1-py3-none-any.whl (486 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/486.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.2/486.2 kB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.7,>=0.3.0 (from datasets)
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.3/134.3 kB[0m [31m17.4 MB/s[0m eta [36m0:

In [71]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision.utils import save_image

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters
latent_size = 64
hidden_size = 256
image_size = 784
num_epochs = 20
batch_size = 100
sample_dir = 'samples'

# Create a directory if not exists
if not os.path.exists(sample_dir):
    os.makedirs(sample_dir)

# Image processing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,))
])

# MNIST dataset
dataset = MNIST(root='data', train=True, transform=transform, download=True)

# Data loader
data_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

# Discriminator network
D = nn.Sequential(
    nn.Linear(image_size, hidden_size),
    nn.LeakyReLU(0.2),
    nn.Linear(hidden_size, hidden_size),
    nn.LeakyReLU(0.2),
    nn.Linear(hidden_size, 1),
    nn.Sigmoid())

# Generator network
G = nn.Sequential(
    nn.Linear(latent_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, image_size),
    nn.Tanh())

# Device setting
D = D.to(device)
G = G.to(device)

# Binary cross entropy loss and optimizer
criterion = nn.BCELoss()
d_optimizer = optim.Adam(D.parameters(), lr=0.0002)
g_optimizer = optim.Adam(G.parameters(), lr=0.0002)

# Start training
total_step = len(data_loader)
for epoch in range(num_epochs):
    for i, (images, _) in enumerate(data_loader):
        batch_size = images.size(0)
        images = images.view(batch_size, -1).to(device)

        # Create the labels which are later used as input for the BCE loss
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)

        # ============================================
        # Train the discriminator
        # ============================================
        # Compute BCE loss using real images
        outputs = D(images)
        d_loss_real = criterion(outputs, real_labels)
        real_score = outputs

        # Compute BCE loss using fake images
        z = torch.randn(batch_size, latent_size).to(device)
        fake_images = G(z)
        outputs = D(fake_images)
        d_loss_fake = criterion(outputs, fake_labels)
        fake_score = outputs

        # Backprop and optimize
        d_loss = d_loss_real + d_loss_fake
        d_optimizer.zero_grad()
        g_optimizer.zero_grad()
        d_loss.backward()
        d_optimizer.step()

        # ============================================
        # Train the generator
        # ============================================
        # Compute loss with fake images
        z = torch.randn(batch_size, latent_size).to(device)
        fake_images = G(z)
        outputs = D(fake_images)

        # We train G to maximize log(D(G(z))) instead of minimizing log(1-D(G(z)))
        g_loss = criterion(outputs, real_labels)

        # Backprop and optimize
        d_optimizer.zero_grad()
        g_optimizer.zero_grad()
        g_loss.backward()
        g_optimizer.step()

        if (i+1) % 200 == 0:
            print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}'
                  .format(epoch, num_epochs, i+1, total_step, d_loss.item(), g_loss.item(), real_score.mean().item(), fake_score.mean().item()))

    # Save real images
    if (epoch+1) == 1:
        images = images.reshape(images.size(0), 1, 28, 28)
        save_image(images, os.path.join(sample_dir, 'real_images.png'))

    # Save sampled images
    fake_images = fake_images.reshape(fake_images.size(0), 1, 28, 28)
    save_image(fake_images, os.path.join(sample_dir, 'fake_images-{}.png'.format(epoch+1)))

# Save the model checkpoints
torch.save(G.state_dict(), 'G.ckpt')
torch.save(D.state_dict(), 'D.ckpt')


Epoch [0/20], Step [200/600], d_loss: 0.0487, g_loss: 3.8431, D(x): 1.00, D(G(z)): 0.04
Epoch [0/20], Step [400/600], d_loss: 0.1466, g_loss: 5.3210, D(x): 0.96, D(G(z)): 0.07
Epoch [0/20], Step [600/600], d_loss: 0.0658, g_loss: 5.1363, D(x): 0.97, D(G(z)): 0.04
Epoch [1/20], Step [200/600], d_loss: 0.0502, g_loss: 5.5272, D(x): 1.00, D(G(z)): 0.05
Epoch [1/20], Step [400/600], d_loss: 0.3101, g_loss: 3.5668, D(x): 0.91, D(G(z)): 0.16
Epoch [1/20], Step [600/600], d_loss: 0.2545, g_loss: 4.6973, D(x): 0.94, D(G(z)): 0.07
Epoch [2/20], Step [200/600], d_loss: 0.2415, g_loss: 4.2968, D(x): 0.93, D(G(z)): 0.10
Epoch [2/20], Step [400/600], d_loss: 0.3634, g_loss: 2.8692, D(x): 0.93, D(G(z)): 0.18
Epoch [2/20], Step [600/600], d_loss: 0.3124, g_loss: 3.6512, D(x): 0.90, D(G(z)): 0.14
Epoch [3/20], Step [200/600], d_loss: 0.5003, g_loss: 4.1663, D(x): 0.86, D(G(z)): 0.18
Epoch [3/20], Step [400/600], d_loss: 0.9019, g_loss: 3.6755, D(x): 0.91, D(G(z)): 0.31
Epoch [3/20], Step [600/600], d_

In [None]:
import matplotlib.pyplot as plt

# Load the trained generator model
generator = G.to(device)
generator.load_state_dict(torch.load('G.ckpt'))
generator.eval()

# Generate a batch of fake images
num_samples = 25  # Number of images to generate
z = torch.randn(num_samples, latent_size).to(device)
fake_images = generator(z).cpu().detach()

# Reshape and denormalize the generated images
fake_images = fake_images.reshape(num_samples, 28, 28)
fake_images = (fake_images + 1) / 2  # Denormalize the images

# Display the generated images
fig, axs = plt.subplots(5, 5, figsize=(10, 10))
axs = axs.flatten()

for i in range(num_samples):
    axs[i].imshow(fake_images[i], cmap='gray')
    axs[i].axis('off')

plt.tight_layout()
plt.show()


In [70]:
import torch
import torch.nn as nn
import torch.optim as optim
import string
import random

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
input_size = len(string.printable)  # Number of unique characters
hidden_size = 256
num_layers = 2
sequence_length = 100
num_epochs = 2000
batch_size = 1
learning_rate = 0.002

# Text dataset
text = """
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
"""

# Preprocess the text
all_characters = string.printable
n_characters = len(all_characters)

# Create a mapping of characters to indices and vice versa
char_to_index = {char: index for index, char in enumerate(all_characters)}
index_to_char = {index: char for index, char in enumerate(all_characters)}

# Convert the text to a sequence of indices
data = [char_to_index[char] for char in text]

# RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, input_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = out.reshape(-1, self.hidden_size)
        out = self.fc(out)
        return out, hidden


# Instantiate the model
model = CharRNN(input_size, hidden_size, num_layers).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training
for epoch in range(num_epochs):
    # Randomly sample a sequence from the dataset
    start_index = random.randint(0, len(data) - sequence_length - 1)
    end_index = start_index + sequence_length + 1
    sequence = data[start_index:end_index]

    # Convert the sequence to a tensor
    input_tensor = torch.tensor(sequence[:-1], dtype=torch.long).unsqueeze(0).to(device)
    target_tensor = torch.tensor(sequence[1:], dtype=torch.long).unsqueeze(0).to(device)

    # Forward pass
    hidden = None
    optimizer.zero_grad()
    output, hidden = model(input_tensor, hidden)

    # Compute the loss
    loss = criterion(output.view(-1, input_size), target_tensor.view(-1))
    loss.backward()
    optimizer.step()

    if (epoch+1) % 100 == 0:
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

# Generate text
with torch.no_grad():
    # Choose a random character as the starting point
    start_char = random.choice(all_characters)
    input_char = start_char

    # Convert the starting character to an index
    input_tensor = torch.tensor([char_to_index[input_char]], dtype=torch.long).unsqueeze(0).to(device)

    # Initialize the hidden state
    hidden = None

    # Generate text
    generated_text = input_char
    for _ in range(500):
        output, hidden = model(input_tensor, hidden)
        _, top_char = output.data.topk(1)
        predicted_char = index_to_char[top_char.item()]

        # Append the predicted character to the generated text
        generated_text += predicted_char

        # Set the predicted character as the input for the next iteration
        input_tensor = torch.tensor([char_to_index[predicted_char]], dtype=torch.long).unsqueeze(0).to(device)

print("Generated Text:")
print(generated_text)


Epoch [100/2000], Loss: 0.7989
Epoch [200/2000], Loss: 0.1308
Epoch [300/2000], Loss: 0.0427
Epoch [400/2000], Loss: 0.0241
Epoch [500/2000], Loss: 0.0301
Epoch [600/2000], Loss: 0.1241
Epoch [700/2000], Loss: 0.0769
Epoch [800/2000], Loss: 0.0385
Epoch [900/2000], Loss: 0.0730
Epoch [1000/2000], Loss: 0.0325
Epoch [1100/2000], Loss: 0.0190
Epoch [1200/2000], Loss: 0.0523
Epoch [1300/2000], Loss: 0.0553
Epoch [1400/2000], Loss: 0.0889
Epoch [1500/2000], Loss: 0.0425
Epoch [1600/2000], Loss: 0.0862
Epoch [1700/2000], Loss: 0.0550
Epoch [1800/2000], Loss: 0.0422
Epoch [1900/2000], Loss: 0.1108
Epoch [2000/2000], Loss: 0.0327
Generated Text:
Yaliqua. 
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. 
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. 
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.














Ut
et

In [44]:
# Model hyperparameters
latent_dim = 100  # Dimensionality of the generator's input noise
learning_rate = 0.0002
betas = (0.5, 0.999)
num_epochs = 1000
batch_size = 64

# Preprocessing: Tokenize phrases and compute TF-IDF

# Load the dataset using the `load_dataset` function
dataset = load_dataset('imdb')

# Extract the text from the dataset
phrases = dataset['train']['text'][:1000]  # Example: using the first 100 samples

# Tokenize phrases
tokenized_phrases = [phrase.split() for phrase in phrases]

# Compute TF-IDF
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(phrases).toarray()

# Convert TF-IDF matrix to PyTorch tensor
tfidf_tensor = torch.tensor(tfidf_matrix, dtype=torch.float32)

# Initialize the generator and discriminator
generator = Generator(latent_dim, tfidf_tensor.shape[1])
discriminator = Discriminator(tfidf_tensor.shape[1])

# Weight initialization
def weights_init(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)

# Apply weight initialization to both generator and discriminator
generator.apply(weights_init)
discriminator.apply(weights_init)

# Define new loss function
adversarial_loss = nn.BCELoss()
auxiliary_loss = nn.MSELoss()

# Define optimizers
gen_optimizer = optim.Adam(generator.parameters(), lr=learning_rate, betas=betas)
disc_optimizer = optim.Adam(discriminator.parameters(), lr=learning_rate, betas=betas)




  0%|          | 0/3 [00:00<?, ?it/s]

In [48]:
import datetime
import torch
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import random

# Define the save interval
save_interval = 10  # Save models and completion time every 10 epochs

# Get the number of samples
num_samples = tfidf_tensor.shape[0]

# Define the warm-up steps and learning rate schedule
warmup_steps = 10
scheduler_step_size = 30
scheduler_gamma = 0.1

# Create the learning rate schedulers
scheduler_gen = lr_scheduler.StepLR(gen_optimizer, step_size=scheduler_step_size, gamma=scheduler_gamma)
scheduler_disc = lr_scheduler.StepLR(disc_optimizer, step_size=scheduler_step_size, gamma=scheduler_gamma)

# Training loop
for epoch in range(num_epochs):
    # Shuffle the indices
    indices = list(range(num_samples))
    random.shuffle(indices)

    # Update the learning rate for warm-up steps
    if epoch < warmup_steps:
        warmup_factor = epoch / warmup_steps
        for param_group in gen_optimizer.param_groups:
            param_group['lr'] = learning_rate * warmup_factor
        for param_group in disc_optimizer.param_groups:
            param_group['lr'] = learning_rate * warmup_factor

    for i in range(0, num_samples, batch_size):
        # Get the batch indices
        batch_indices = indices[i:i + batch_size]

        # Generate fake samples
        z = torch.randn(len(batch_indices), latent_dim)
        fake_samples = generator(z)

        # Real samples
        real_samples = tfidf_tensor[batch_indices]
        real_batch_size = real_samples.shape[0]

        # Pad sequences to the same length
        fake_samples_padded = pad_sequence([torch.Tensor(sample) for sample in fake_samples], batch_first=True)
        real_samples_padded = pad_sequence([torch.Tensor(sample) for sample in real_samples], batch_first=True)

        # Ensure both real and fake samples have the same batch size
        fake_samples_padded = fake_samples_padded[:real_batch_size]

        # Discriminator loss
        disc_optimizer.zero_grad()
        real_labels = torch.ones(real_batch_size, 1)
        fake_labels = torch.zeros(real_batch_size, 1)
        disc_real_output = discriminator(real_samples_padded)
        disc_fake_output = discriminator(fake_samples_padded.detach())
        disc_real_loss = criterion(disc_real_output, real_labels[:real_batch_size])
        disc_fake_loss = criterion(disc_fake_output, fake_labels)
        disc_loss = disc_real_loss + disc_fake_loss
        disc_loss.backward()
        disc_optimizer.step()

        # Generator loss
        gen_optimizer.zero_grad()
        gen_fake_output = discriminator(fake_samples_padded)
        gen_loss = criterion(gen_fake_output, real_labels[:real_batch_size])
        gen_loss.backward()
        gen_optimizer.step()

    # Update the learning rate using the scheduler
    scheduler_gen.step()
    scheduler_disc.step()

    # Print epoch and loss
    print(f"Epoch [{epoch+1}/{num_epochs}], Generator Loss: {gen_loss.item()}, Discriminator Loss: {disc_loss.item()}")

    # Store models and completion time
    if (epoch + 1) % save_interval == 0:
        completion_time = datetime.datetime.now()
        checkpoint = {
            'epoch': epoch + 1,
            'generator_state_dict': generator.state_dict(),
            'discriminator_state_dict': discriminator.state_dict(),
            'completion_time': completion_time
        }
        torch.save(checkpoint, f'models_checkpoint_epoch_{epoch + 1}.pth')


Epoch [1/1000], Generator Loss: 0.7304068207740784, Discriminator Loss: 1.3999083042144775
Epoch [2/1000], Generator Loss: 0.7239536046981812, Discriminator Loss: 1.3935409784317017
Epoch [3/1000], Generator Loss: 0.7226549386978149, Discriminator Loss: 1.3918981552124023
Epoch [4/1000], Generator Loss: 0.7213510870933533, Discriminator Loss: 1.3915683031082153
Epoch [5/1000], Generator Loss: 0.7193614840507507, Discriminator Loss: 1.3893394470214844
Epoch [6/1000], Generator Loss: 0.7173088788986206, Discriminator Loss: 1.3891737461090088
Epoch [7/1000], Generator Loss: 0.7149339318275452, Discriminator Loss: 1.3879200220108032
Epoch [8/1000], Generator Loss: 0.7125612497329712, Discriminator Loss: 1.387558937072754
Epoch [9/1000], Generator Loss: 0.7102335691452026, Discriminator Loss: 1.3875731229782104
Epoch [10/1000], Generator Loss: 0.7068346738815308, Discriminator Loss: 1.3885982036590576
Epoch [11/1000], Generator Loss: 0.7050193548202515, Discriminator Loss: 1.387583136558532

KeyboardInterrupt: ignored

In [38]:
import torch

# Load the generator model from the saved checkpoint
checkpoint_path = 'models_checkpoint_epoch_100.pth'
checkpoint = torch.load(checkpoint_path)
generator.load_state_dict(checkpoint['generator_state_dict'])
generator.eval()

# Generating new phrases
num_generated_phrases = 5
noise = torch.randn(num_generated_phrases, latent_dim)
generated_phrases = generator(noise)

# Convert the generated phrases to TF-IDF matrix
generated_phrases_padded = pad_sequence([torch.Tensor(sample) for sample in generated_phrases], batch_first=True)
generated_phrases_padded = generated_phrases_padded.detach().numpy()

# Inverse transform the generated TF-IDF matrix to obtain the phrases
generated_tfidf = vectorizer.inverse_transform(generated_phrases_padded)

# Convert the phrases to text representation
generated_text = []
for phrase in generated_tfidf:
    text = ' '.join(phrase)
    generated_text.append(text)

for i, phrase in enumerate(generated_text):
    print(f"Generated Phrase {i+1}: {phrase}")


