In [1]:
import requests
import zipfile
import io
import scipy.io
import tempfile
import splitfolders 
import shutil
import random

import os
import numpy as np
import matplotlib.pyplot as plt
import cv2

# Loading dataset

If you only downloaded the notebook without the dataset run the following two blocks:

In [3]:
if not os.path.isdir('./images'):
    url = "https://github.com/HenriqueDSousa/pix2pix/raw/main/dataset/images_dataset.zip"

    os.makedirs("./images", exist_ok=True)
    response = requests.get(url)
    response.raise_for_status()

    # Unzip the file
    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
        zip_ref.extractall("./images")


### Extracting files

Selecting all images and spliting them into train, test and validation. The output will be on *image_data* directory. 

In [5]:
def split_images(source_dir, train_dir, test_dir, train_ratio=0.8):
    
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    image_files = [f for f in os.listdir(source_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    random.shuffle(image_files)

    split_index = int(train_ratio * len(image_files))

    train_files = image_files[:split_index]
    test_files = image_files[split_index:]

    for file in train_files:
        shutil.copy2(os.path.join(source_dir, file), os.path.join(train_dir, file))

    # Copy the testing files
    for file in test_files:
        shutil.copy2(os.path.join(source_dir, file), os.path.join(test_dir, file))


In [6]:
source_dir = 'images/images_dataset'  
train_dir = 'images/train_images'  
test_dir = 'images/test_images'  

if not os.path.isdir('images/train_images') or not os.path.isdir('images/test_images'):
    split_images(source_dir, train_dir, test_dir, train_ratio=0.8)

## Images