In [36]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import glob
from PIL import Image

import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


In [18]:
# Define the ConvNormLReLU layer
class ConvNormLReLU(nn.Sequential):
    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1, pad_mode="reflect", groups=1, bias=False):
        pad_layer = {
            "zero": nn.ZeroPad2d,
            "same": nn.ReplicationPad2d,
            "reflect": nn.ReflectionPad2d,
        }
        if pad_mode not in pad_layer:
            raise NotImplementedError

        super(ConvNormLReLU, self).__init__(
            pad_layer[pad_mode](padding),
            nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, stride=stride, padding=0, groups=groups, bias=bias),
            nn.GroupNorm(num_groups=1, num_channels=out_ch, affine=True),
            nn.LeakyReLU(0.2, inplace=True)
        )

# Define the Generator model
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.block_a = nn.Sequential(
            ConvNormLReLU(3, 32, kernel_size=7, padding=3),
            ConvNormLReLU(32, 64, stride=2, padding=(0, 1, 0, 1)),
            ConvNormLReLU(64, 64)
        )

        self.block_b = nn.Sequential(
            ConvNormLReLU(64, 128, stride=2, padding=(0, 1, 0, 1)),
            ConvNormLReLU(128, 128)
        )

        self.block_c = nn.Sequential(
            ConvNormLReLU(128, 128),
            ConvNormLReLU(128, 256),
            ConvNormLReLU(256, 256),
            ConvNormLReLU(256, 128)
        )

        self.block_d = nn.Sequential(
            ConvNormLReLU(128, 128),
            ConvNormLReLU(128, 128)
        )

        self.block_e = nn.Sequential(
            ConvNormLReLU(128, 64),
            ConvNormLReLU(64, 64),
            ConvNormLReLU(64, 32, kernel_size=7, padding=3)
        )

        self.out_layer = nn.Sequential(
            nn.Conv2d(32, 3, kernel_size=1, stride=1, padding=0, bias=False),
            nn.Tanh()
        )

    def forward(self, input, align_corners=True):
        out = self.block_a(input)
        half_size = out.size()[-2:]
        out = self.block_b(out)
        out = self.block_c(out)

        out = nn.functional.interpolate(out, half_size, mode="bilinear", align_corners=align_corners)
        out = self.block_d(out)

        out = nn.functional.interpolate(out, input.size()[-2:], mode="bilinear", align_corners=align_corners)
        out = self.block_e(out)
        out = self.out_layer(out)
        return out

# Define the Discriminator model
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.layers = nn.Sequential(
            ConvNormLReLU(3, 64, stride=2),
            ConvNormLReLU(64, 128, stride=2),
            ConvNormLReLU(128, 256, stride=2),
            ConvNormLReLU(256, 512, stride=2),
            nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0)
        )

    def forward(self, x):
        return self.layers(x).squeeze()

In [19]:
# Define a dataset class for the Ghibli dataset
class GhibliDataset(Dataset):
    def __init__(self, dataset_path, transform=None):
        self.image_paths = glob.glob(f"{dataset_path}/*.jpg")
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, image

In [20]:
# Load the generator and discriminator models
generator = Generator()
discriminator = Discriminator()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator = generator.to(device)
discriminator = discriminator.to(device)

dataset_path = 'dataset/ghibli'
from torchvision import transforms
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])
dataset = GhibliDataset(dataset_path, transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Define loss functions and optimizers
gen_criterion = nn.MSELoss()
adv_criterion = nn.BCEWithLogitsLoss()
optimizer_gen = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_disc = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))

In [25]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)

        # Train Discriminator
        optimizer_disc.zero_grad()
        real_labels = torch.ones(inputs.size(), device=device)
        fake_labels = torch.zeros(inputs.size(), device=device)

        real_outputs = discriminator(targets)
        fake_images = generator(inputs).detach()
        fake_outputs = discriminator(fake_images)

        real_loss = adv_criterion(real_outputs, real_labels)
        fake_loss = adv_criterion(fake_outputs, fake_labels)

        disc_loss = real_loss + fake_loss
        disc_loss.backward()
        optimizer_disc.step()

        # Train Generator
        optimizer_gen.zero_grad()
        fake_images = generator(inputs)
        fake_outputs = discriminator(fake_images)

        gen_loss = gen_criterion(fake_images, targets) + adv_criterion(fake_outputs, real_labels)
        gen_loss.backward()
        optimizer_gen.step()

    print(f"Epoch [{epoch+1}/{num_epochs}] - Generator Loss: {gen_loss.item():.4f}, Discriminator Loss: {disc_loss.item():.4f}")

# Save models
torch.save(generator.state_dict(), "fine_tuned_generator.pt")
torch.save(discriminator.state_dict(), "fine_tuned_discriminator.pt")


ValueError: Target size (torch.Size([4, 3, 256, 256])) must be the same as input size (torch.Size([4, 169]))

In [38]:
# Fine-tuning PyTorch Model (AnimeGANv2)

import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from model import Generator  # Ensure you have these in your working directory
import glob
from PIL import Image, UnidentifiedImageError
from torchvision import transforms

class ConvNormLReLU(nn.Sequential):
    def __init__(self, in_ch, out_ch, kernel_size=3, stride=1, padding=1, pad_mode="reflect", groups=1, bias=False):
        pad_layer = {
            "zero": nn.ZeroPad2d,
            "same": nn.ReplicationPad2d,
            "reflect": nn.ReflectionPad2d,
        }
        if pad_mode not in pad_layer:
            raise NotImplementedError

        super(ConvNormLReLU, self).__init__(
            pad_layer[pad_mode](padding),
            nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, stride=stride, padding=0, groups=groups, bias=bias),
            nn.GroupNorm(num_groups=1, num_channels=out_ch, affine=True),
            nn.LeakyReLU(0.2, inplace=True)
        )

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.layers = nn.Sequential(
            ConvNormLReLU(3, 64, stride=2),
            ConvNormLReLU(64, 128, stride=2),
            ConvNormLReLU(128, 256, stride=2),
            ConvNormLReLU(256, 512, stride=2),
            nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0)
        )

    def forward(self, x):
        return self.layers(x).squeeze()

# Dataset class for Ghibli dataset
class GhibliDataset(Dataset):
    def __init__(self, dataset_path, transform=None):
        self.image_paths = glob.glob(f"{dataset_path}/*.*")  # Include multiple image formats
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        try:
            image = Image.open(image_path).convert("RGB")
        except UnidentifiedImageError:
            print(f"Warning: Unable to read image {image_path}. Skipping.")
            return None, None

        if self.transform:
            image = self.transform(image)

        return image, image

# Load the generator and discriminator models
generator = Generator()
discriminator = Discriminator()

# Load pretrained weights
gen_model_path = 'weights/paprika.pt'
generator.load_state_dict(torch.load(gen_model_path))
generator.train()
discriminator.train()

# Move models to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator = generator.to(device)
discriminator = discriminator.to(device)

# Dataset and DataLoader with normalization
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # Normalize to [-1, 1]
])

dataset_path = 'dataset/ghibli'
dataset = GhibliDataset(dataset_path, transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, drop_last=True)

# Define loss functions and optimizers
gen_criterion = nn.MSELoss().to(device)
adv_criterion = nn.BCELoss().to(device)

optimizer_gen = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_disc = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    gen_total_loss = 0.0
    disc_total_loss = 0.0

    for inputs, targets in dataloader:
        if inputs is None or targets is None:
            continue

        inputs, targets = inputs.to(device), targets.to(device)

        # Train Discriminator
        optimizer_disc.zero_grad()

        real_outputs = discriminator(targets)
        real_labels = torch.ones_like(real_outputs, device=device)  
        real_loss = adv_criterion(real_outputs, real_labels)

        fake_images = generator(inputs).detach()
        fake_outputs = discriminator(fake_images)
        fake_labels = torch.zeros_like(fake_outputs, device=device)  
        fake_loss = adv_criterion(fake_outputs, fake_labels)

        disc_loss = real_loss + fake_loss
        disc_loss.backward()
        optimizer_disc.step()

        # Train Generator
        optimizer_gen.zero_grad()

        fake_images = generator(inputs)
        fake_outputs = discriminator(fake_images)
        gen_labels = torch.ones_like(fake_outputs, device=device) 
        gen_loss = gen_criterion(fake_images, targets) + adv_criterion(fake_outputs, gen_labels)

        gen_loss.backward()
        optimizer_gen.step()

        gen_total_loss += gen_loss.item()
        disc_total_loss += disc_loss.item()

    print(f"Epoch [{epoch + 1}/{num_epochs}], Generator Loss: {gen_total_loss / len(dataloader):.4f}, Discriminator Loss: {disc_total_loss / len(dataloader):.4f}")

# Save fine-tuned models
torch.save(generator.state_dict(), 'fine_tuned_generator.pt')
torch.save(discriminator.state_dict(), 'fine_tuned_discriminator.pt')
print("Fine-tuned models saved successfully.")


  generator.load_state_dict(torch.load(gen_model_path))


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [35]:
!CUDA_LAUNCH_BLOCKING=1 python training.ipynb

'CUDA_LAUNCH_BLOCKING' is not recognized as an internal or external command,
operable program or batch file.
