# Clone the Repo

In [None]:
!git clone https://github.com/ShilpaShivarudraiah/AnimeGAN-Pytorch.git

Cloning into 'AnimeGAN-Pytorch'...
remote: Enumerating objects: 52, done.[K
remote: Counting objects: 100% (52/52), done.[K
remote: Compressing objects: 100% (49/49), done.[K
remote: Total 52 (delta 2), reused 52 (delta 2), pack-reused 0[K
Unpacking objects: 100% (52/52), 40.67 MiB | 8.73 MiB/s, done.


# Dataset Pre Processing for Cartoon10k (Old)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/cartoonset10k.tgz ./

In [None]:
#@title Remove all the csv files in the dataset
import os
path ='/content/cartoonset10k'
for file in os.listdir(path):
  if '.csv' in file:
    os.remove(os.path.join(path,file))

In [None]:
#@title Check if there any csv file left
import os
path ='/content/cartoonset10k'
for file in os.listdir(path):
  if '.csv' in file:
    print(file)

## Transform the cartoon dataset to edge-smooth

In [None]:
%cd /content/pytorch-animeGAN

/content/pytorch-animeGAN


In [None]:
#Resize the images in cartoonset10k to 256x256
import cv2
import os

# Set the directory path
directory = '/content/cartoonset10k'

# Loop through all files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.jpg') or filename.endswith('.png'): # Change file extensions to match your files
        # Read the image
        img = cv2.imread(os.path.join(directory, filename))

        # Resize the image to 256x256
        img_resized = cv2.resize(img, (256, 256))

        # Write the resized image back to disk
        cv2.imwrite(os.path.join(directory, filename), img_resized)


In [None]:
#Move the foldet inside the dataset folder inside pytorch-animeGAN
if not os.path.exists('/content/pytorch-animeGAN/dataset'):
  %mkdir /content/pytorch-animeGAN/dataset
%mv /content/cartoonset10k /content/pytorch-animeGAN/dataset

In [None]:
# Function to make the edges smooth

from glob import glob
import numpy as np
import cv2, os
from tqdm import tqdm

def make_edge_smooth(dataset_name, img_size) :
    file_list = glob('dataset/{}/*.*'.format(dataset_name))
    save_dir = 'dataset/{}_smooth'.format(dataset_name)
    os.makedirs(save_dir, exist_ok=True)

    kernel_size = 5
    kernel = np.ones((kernel_size, kernel_size), np.uint8)
    gauss = cv2.getGaussianKernel(kernel_size, 0)
    gauss = gauss * gauss.transpose(1, 0)

    for f in tqdm(file_list) :
        file_name = os.path.basename(f)

        bgr_img = cv2.imread(f)
        gray_img = cv2.imread(f, 0)

        bgr_img = cv2.resize(bgr_img, (img_size, img_size))
        pad_img = np.pad(bgr_img, ((2, 2), (2, 2), (0, 0)), mode='reflect')
        gray_img = cv2.resize(gray_img, (img_size, img_size))

        edges = cv2.Canny(gray_img, 100, 200)
        dilation = cv2.dilate(edges, kernel)

        gauss_img = np.copy(bgr_img)
        idx = np.where(dilation != 0)
        for i in range(np.sum(dilation != 0)):
            gauss_img[idx[0][i], idx[1][i], 0] = np.sum(
                np.multiply(pad_img[idx[0][i]:idx[0][i] + kernel_size, idx[1][i]:idx[1][i] + kernel_size, 0], gauss))
            gauss_img[idx[0][i], idx[1][i], 1] = np.sum(
                np.multiply(pad_img[idx[0][i]:idx[0][i] + kernel_size, idx[1][i]:idx[1][i] + kernel_size, 1], gauss))
            gauss_img[idx[0][i], idx[1][i], 2] = np.sum(
                np.multiply(pad_img[idx[0][i]:idx[0][i] + kernel_size, idx[1][i]:idx[1][i] + kernel_size, 2], gauss))

        assert cv2.imwrite(os.path.join(save_dir, file_name), gauss_img)

In [None]:
#Smoothen the edges
make_edge_smooth('cartoonset10k',256)

100%|██████████| 10000/10000 [41:13<00:00,  4.04it/s]


In [None]:
#Zip the smoothen folder
!zip -qr '/content/drive/MyDrive/Preprocessed_Cartoonset_10k.zip' '/content/pytorch-animeGAN/dataset'

# Getting the Train Photo (CelebA Dataset)

In [None]:
!unzip -q '/content/drive/MyDrive/celebA_Kaggle.zip'

In [None]:
#@title Split the dataset

import os
import shutil

# Path to the folder containing the images
folder_path = "/content/img_align_celeba"

# Create the test and train folders
os.mkdir(os.path.join('/content/', "test"))
os.mkdir(os.path.join('/content/', "train"))

# Get the list of image files
files = os.listdir(folder_path)
image_files = [f for f in files if f.endswith(".jpg") or f.endswith(".png")]

# Sort the image files alphabetically
image_files.sort()

# Copy the first 60k images to the train folder
for i in range(60000):
    src = os.path.join(folder_path, image_files[i])
    dst = os.path.join('/content/', "train", image_files[i])
    shutil.copy(src, dst)

# Copy the remaining images to the test folder
for i in range(60000, len(image_files)):
    src = os.path.join(folder_path, image_files[i])
    dst = os.path.join('/content/', "test", image_files[i])
    shutil.copy(src, dst)


In [None]:
#@title Resizing the dataset
from PIL import Image
import os

# Set the path to the train and test folders
train_path = "/content/train"
test_path = "/content/test"

# Define the new size for the images
new_size = (256, 256)

# Loop through the train images and resize them
for filename in os.listdir(train_path):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        filepath = os.path.join(train_path, filename)
        with Image.open(filepath) as img:
            img = img.resize(new_size)
            img.save(filepath)

# Loop through the test images and resize them
for filename in os.listdir(test_path):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        filepath = os.path.join(test_path, filename)
        with Image.open(filepath) as img:
            img = img.resize(new_size)
            img.save(filepath)



In [None]:
!mkdir ./celebA_split

In [None]:
!mv /content/test ./celebA_split

In [None]:
!mv /content/train ./celebA_split

In [None]:
!zip -qr './celebA_split.zip' ./celebA_split

In [None]:
!cp ./celebA_split.zip /content/drive/MyDrive

# Extracting the Train and Test Split from CelebA Dataset 


In [None]:
!unzip -q /content/drive/MyDrive/celebA_split.zip

In [None]:
!mv /content/celebA_split/test /content/AnimeGAN-Pytorch/dataset

In [None]:
!mv /content/celebA_split/train /content/AnimeGAN-Pytorch/dataset

In [None]:
!mv /content/AnimeGAN-Pytorch/dataset/train /content/AnimeGAN-Pytorch/dataset/train_photo

# Trimming Dataset to reduce training time to 2000 images in cartoonset10k images and 6000 images in train_photo


In [None]:
#Depreceated

import os
import random
path = '/content/AnimeGAN-Pytorch/dataset/cartoonset10k/smooth'
files = os.listdir(path)
if len(files) <= 2000:
    exit()
random.shuffle(files)
for i in range(len(files) - 2000):
    os.remove(os.path.join(path, files[i]))



In [None]:
#Depereceated
print(len(os.listdir(path)))

In [None]:
#Depereceated
import os
path = '/content/AnimeGAN-Pytorch/dataset/cartoonset10k/smooth'
files_to_keep = os.listdir(path)
all_files_path = '/content/AnimeGAN-Pytorch/dataset/cartoonset10k/style'
all_files = os.listdir(all_files_path)
for file in all_files:
    if file not in files_to_keep:
        os.remove(os.path.join(all_files_path, file))



In [None]:
#Depereceated
print(len(os.listdir(all_files_path)))

In [None]:
#Depereceated
all_files_path = '/content/AnimeGAN-Pytorch/dataset/cartoonset10k/style'
all_files = os.listdir(all_files_path)
for file in all_files:
    if file in files_to_keep:
        print(file)

In [None]:
import os
import random
path = '/content/AnimeGAN-Pytorch/dataset/train_photo'
files = os.listdir(path)
if len(files) <= 6000:
    exit()
random.shuffle(files)
for i in range(len(files) - 6000):
    os.remove(os.path.join(path, files[i]))



In [None]:
!zip -qr '/content/drive/MyDrive/dataset_trimmed.zip' /content/AnimeGAN-Pytorch/dataset

# Unziping the Trimmed Dataset, Deleting the Cartoon10k and combining it with AnimeGAN Dataset


In [None]:
cd /content/AnimeGAN-Pytorch

/content/pytorch-animeGAN


In [None]:
!unzip -q '/content/drive/MyDrive/dataset_trimmed.zip'

In [None]:
!unzip -q '/content/drive/MyDrive/AnimeGAN_Dataset.zip'

In [None]:
!rm -rf '/content/AnimeGAN-Pytorch/content/pytorch-animeGAN/dataset/cartoonset10k'

In [None]:
!mv '/content/AnimeGAN-Pytorch/dataset/Shinkai' '/content/AnimeGAN-Pytorch/content/pytorch-animeGAN/dataset'

In [None]:
!rm -rf /content/AnimeGAN-Pytorch/dataset/*

In [None]:
!mv /content/AnimeGAN-Pytorch/content/pytorch-animeGAN/dataset/* /content/AnimeGAN-Pytorch/dataset

In [None]:
!zip -qr '/content/drive/MyDrive/shinkai_celebA.zip' /content/AnimeGAN-Pytorch/dataset/