# Generovanie aut

In [30]:
import torch
import torchvision
import torchvision.transforms as transforms

# Set device to use CUDA if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

path = 'C:/Users/tibor/PycharmProjects/nsiete3/data/dataset'

cuda


In [31]:
import os
from prettytable import PrettyTable

def printTable():
    def count_files(path):
        num_files = 0
        for root, dirs, files in os.walk(path):
            for file in files:
                num_files += 1
        return num_files

    directories = [dir for dir in os.listdir(path)
                   if os.path.isdir(os.path.join(path, dir))]

    table = PrettyTable()
    table.field_names = ["folder", "img count"]

    for dir in directories:
        dir_path = os.path.join(path, dir)
        file_count = count_files(dir_path)
        table.add_row([dir, file_count])

    table.sortby = "img count"

    print(table)

In [36]:
printTable()

+--------------+-----------+
|    folder    | img count |
+--------------+-----------+
|    truck     |    396    |
| personal_car |    8792   |
+--------------+-----------+


In [37]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets

data_dir = path
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])
dataset = datasets.ImageFolder(root=path, transform=transform)

In [39]:
data_loader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=False, num_workers=4)

In [40]:
mean = 0.
std = 0.
total_images = 0
for images, _ in data_loader:
    batch_samples = images.size(0)
    images = images.view(batch_samples, images.size(1), -1)
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)
    total_images += batch_samples

mean /= total_images
std /= total_images


In [41]:
print("Mean: ", mean)
print("Std: ", std)

Mean:  tensor([0.2946, 0.3051, 0.3091])
Std:  tensor([0.2032, 0.1997, 0.1998])


# Rerun from here

In [26]:
import torch
import torchvision.transforms as transforms
import torchvision

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

path = 'C:/Users/tibor/PycharmProjects/nsiete3/data/dataset'


transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.2946, 0.3051, 0.3091], std=[0.2032, 0.1997, 0.1998])
])

cuda


In [27]:
dataset = torchvision.datasets.ImageFolder(root=path, transform=transform)

# Split the dataset into training and testing sets
train_size = int(0.99 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])


In [28]:
batch_size = 16
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Encoder(nn.Module):
    def __init__(self, latent_size):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=4, stride=2, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1)
        self.fc1 = nn.Linear(256 * 4 * 4, 512)
        self.fc21 = nn.Linear(512, latent_size)
        self.fc22 = nn.Linear(512, latent_size)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = x.view(-1, 256 * 4 * 4)
        x = F.relu(self.fc1(x))
        mu = self.fc21(x)
        logvar = self.fc22(x)
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        z = mu + eps*std
        return z, mu, logvar

class Decoder(nn.Module):
    def __init__(self, latent_size):
        super(Decoder, self).__init__()

        self.fc1 = nn.Linear(latent_size, 1024)
        self.fc2 = nn.Linear(1024, 4 * 4 * 128)
        self.conv1 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1)
        self.conv2 = nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1)
        self.conv3 = nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1)
        self.conv4 = nn.ConvTranspose2d(16, 3, kernel_size=4, stride=2, padding=1)

    def forward(self, z):
        x = F.relu(self.fc1(z))
        x = F.relu(self.fc2(x))
        x = x.view(-1, 128, 4, 4)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = torch.sigmoid(self.conv4(x))
        return x

class VAE(nn.Module):
    def __init__(self, latent_size):
        super(VAE, self).__init__()
        self.latent_size = latent_size
        self.encoder = Encoder(latent_size)
        self.decoder = Decoder(latent_size)

    def forward(self, x):
        z, mu, logvar = self.encoder(x)
        recon_x = self.decoder(z)
        return recon_x, mu, logvar

    def sample(self, num_samples):
        with torch.no_grad():
            z = torch.randn(num_samples, self.latent_size).to(device)
            samples = self.decoder(z).cpu()
            return samples

# Training

In [33]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import wandb

usewandb = True

# Define hyperparameters
batch_size = 16
learning_rate = 0.001
num_epochs = 10
latent_size = 256

# Create the VAE model and optimizer
vae = VAE(latent_size).to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=learning_rate)

# Define the loss function
def loss_function(recon_x, x, mu, logvar):
    BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

# Initialize WandB
if usewandb:
    wandb.init(project="car-generation")

    # Log hyperparameters
    config = wandb.config
    config.batch_size = batch_size
    config.learning_rate = learning_rate
    config.num_epochs = num_epochs
    config.latent_size = latent_size

# Train the VAE
vae.train()
for epoch in range(num_epochs):
    for i, (images, _) in enumerate(tqdm(trainloader, desc=f"Epoch {epoch+1}/{num_epochs}")):
        images = images.to(device)
        optimizer.zero_grad()
        recon_images, mu, logvar = vae(images)
        loss = loss_function(recon_images, images, mu, logvar)
        loss.backward()
        optimizer.step()

        if usewandb:
            # Log loss to WandB
            wandb.log({"vae_loss": loss})

    # Generate new car images
    vae.eval()
    with torch.no_grad():
        num_samples = 10
        samples = vae.sample(num_samples)
        torchvision.utils.save_image(samples, f"generated_cars{epoch}.png")
        if usewandb:
            # Log generated images to WandB
            wandb.log({"generated_images": [wandb.Image(sample) for sample in samples]})


VBox(children=(Label(value='0.071 MB of 0.071 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
vae_loss,█▆▇▇▇▆▆▆▄▆▅▅▃▅▅▃▃▅▃▃▄▃▁▅▅▅▃▄▄▄▄▄▅▅▅▅▅▅▄▃

0,1
vae_loss,118408.72656


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

Epoch 1/10: 100%|██████████| 529/529 [00:51<00:00, 10.29it/s]
Epoch 2/10: 100%|██████████| 529/529 [00:50<00:00, 10.43it/s]
Epoch 3/10: 100%|██████████| 529/529 [00:52<00:00, 10.08it/s]
Epoch 4/10: 100%|██████████| 529/529 [00:50<00:00, 10.55it/s]
Epoch 5/10: 100%|██████████| 529/529 [00:49<00:00, 10.63it/s]
Epoch 6/10: 100%|██████████| 529/529 [00:49<00:00, 10.60it/s]
Epoch 7/10: 100%|██████████| 529/529 [00:51<00:00, 10.24it/s]
Epoch 8/10: 100%|██████████| 529/529 [00:49<00:00, 10.58it/s]
Epoch 9/10: 100%|██████████| 529/529 [00:50<00:00, 10.40it/s]
Epoch 10/10: 100%|██████████| 529/529 [00:51<00:00, 10.35it/s]


# Next training

In [36]:
torch.save(vae, f"vae_model_epoch_{10}.pth")
torch.save(optimizer.state_dict(), f"optimizer_epoch_{10}.pth")

In [44]:
import datetime


import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

# Define hyperparameters
batch_size = 16
learning_rate = 0.001
num_epochs = 100
latent_size = 256

# Load the existing VAE model checkpoint
vae = torch.load("models2/vae_model_epoch_20.pth")

# Load the optimizer state from the previous training run
optimizer = torch.optim.Adam(vae.parameters(), lr=learning_rate)
optimizer.load_state_dict(torch.load("models2/optimizer_epoch_20.pth"))

# Define the loss function
def loss_function(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

# Train the VAE
vae.train()
for epoch in range(num_epochs):
    for i, (images, _) in enumerate(tqdm(trainloader, desc=f"Epoch {epoch+1}/{num_epochs}")):
        images = images.to(device)
        optimizer.zero_grad()
        recon_images, mu, logvar = vae(images)
        loss = loss_function(recon_images, images, mu, logvar)
        loss.backward()
        optimizer.step()

    # Save the model checkpoint and optimizer state after each epoch
    torch.save(vae, f"vae_model_epoch_{epoch+1}.pth")
    torch.save(optimizer.state_dict(), f"optimizer_epoch_{epoch+1}.pth")



    # Generate new car images
    vae.eval()
    with torch.no_grad():
        num_samples = 16
        samples = vae.sample(num_samples)
        now = datetime.datetime.now()
        timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")
        torchvision.utils.save_image(samples, f"generated_cars{timestamp}.png")
        if usewandb:
            # Log generated images to WandB
            wandb.log({"generated_images": [wandb.Image(sample) for sample in samples]})

Epoch 1/100: 100%|██████████| 529/529 [00:46<00:00, 11.26it/s]
Epoch 2/100: 100%|██████████| 529/529 [00:45<00:00, 11.52it/s]
Epoch 3/100: 100%|██████████| 529/529 [00:45<00:00, 11.59it/s]
Epoch 4/100: 100%|██████████| 529/529 [00:45<00:00, 11.55it/s]
Epoch 5/100: 100%|██████████| 529/529 [00:44<00:00, 11.79it/s]
Epoch 6/100: 100%|██████████| 529/529 [00:46<00:00, 11.46it/s]
Epoch 7/100: 100%|██████████| 529/529 [00:45<00:00, 11.74it/s]
Epoch 8/100: 100%|██████████| 529/529 [00:44<00:00, 11.77it/s]
Epoch 9/100: 100%|██████████| 529/529 [00:45<00:00, 11.55it/s]
Epoch 10/100: 100%|██████████| 529/529 [00:46<00:00, 11.42it/s]
Epoch 11/100: 100%|██████████| 529/529 [00:44<00:00, 11.84it/s]
Epoch 12/100: 100%|██████████| 529/529 [00:44<00:00, 11.79it/s]
Epoch 13/100: 100%|██████████| 529/529 [00:45<00:00, 11.57it/s]
Epoch 14/100: 100%|██████████| 529/529 [00:45<00:00, 11.56it/s]
Epoch 15/100: 100%|██████████| 529/529 [00:45<00:00, 11.60it/s]
Epoch 16/100: 100%|██████████| 529/529 [00:46<00:

KeyboardInterrupt: 

In [None]:
# # Generate a random latent vector
# z = torch.randn(1, 256)
#
# # Generate a new image from the latent vector
# with torch.no_grad():
#     generated_img = model.decode(z)
#
# # Display the generated image
# import matplotlib.pyplot as plt
# plt.imshow(generated_img[0].permute(1, 2, 0))
# plt.show()

In [25]:

# Generate new car images
vae.eval()
with torch.no_grad():
    num_samples = 10
    samples = vae.sample(num_samples)
    torchvision.utils.save_image(samples, f"generated_cars.png{}")
    if usewandb:
        # Log generated images to WandB
        wandb.log({"generated_images": [wandb.Image(sample) for sample in samples]})