# Creating Datasest from Kaggle Dataset

In [5]:
from PIL import Image
import os
from sklearn.model_selection import train_test_split
import shutil

### Creating low resolution images from Face Dataset

In [17]:
input_dir = "/Users/kavian/Desktop/data/high_resolution_images"
output_dir = "/Users/kavian/Desktop/data/low_resolution_images"
scale_factor = 0.5  # Adjust this to set the desired low-resolution scale factor

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for filename in os.listdir(input_dir):
    if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".gif") :
        img = Image.open(os.path.join(input_dir, filename))
        low_res_img = img.resize((int(img.width * scale_factor), int(img.height * scale_factor)), Image.LANCZOS)
        low_res_img.save(os.path.join(output_dir, filename))


### Split Dataset to train, validation and test

In [12]:
def split_dataset(source_dir, train_dir, test_dir, validation_dir, split_ratio=(0.8, 0.1, 0.1)):
    # Get the list of image filenames
    file_list = os.listdir(source_dir)
    train_files, test_files = train_test_split(file_list, test_size=split_ratio[1] + split_ratio[2], random_state=42)
    test_files, validation_files = train_test_split(test_files, test_size=split_ratio[2] / (split_ratio[1] + split_ratio[2]), random_state=42)

    # Create the directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)

    # Copy images to their respective directories
    for filename in train_files:
        shutil.copy(os.path.join(source_dir, filename), os.path.join(train_dir, filename))
    for filename in test_files:
        shutil.copy(os.path.join(source_dir, filename), os.path.join(test_dir, filename))
    for filename in validation_files:
        shutil.copy(os.path.join(source_dir, filename), os.path.join(validation_dir, filename))

source_dir = "/Users/kavian/Desktop/data/low_resolution_images"
train_dir = "/Users/kavian/Desktop/data/train/low_resolution"
test_dir = "/Users/kavian/Desktop/data/test/low_resolution"
validation_dir = "/Users/kavian/Desktop/data/validation/low_resolution"
split_dataset(source_dir, train_dir, test_dir, validation_dir)

source_dir = "/Users/kavian/Desktop/data/high_resolution_images"  # Directory with high-resolution images
train_dir = "/Users/kavian/Desktop/data/train/high_resolution"
test_dir = "/Users/kavian/Desktop/data/test/high_resolution"
validation_dir = "/Users/kavian/Desktop/data/validation/high_resolution"
split_dataset(source_dir, train_dir, test_dir, validation_dir)


### Split High Resolution Images

In [18]:
from sklearn.model_selection import train_test_split
import random
import os
import shutil

def create_subdirectories(root_dir, subdirs):
    for subdir in subdirs:
        os.makedirs(os.path.join(root_dir, subdir), exist_ok=True)

def split_dataset(low_resolution_source_dir, high_resolution_source_dir, train_dir, test_dir, validation_dir, split_ratio=(0.8, 0.1, 0.1), random_seed=42):
    # Get the list of low-resolution image filenames and sort them for consistency
    low_resolution_file_list = os.listdir(low_resolution_source_dir)
    low_resolution_file_list.sort()

    # Get the list of high-resolution image filenames and sort them for consistency
    high_resolution_file_list = os.listdir(high_resolution_source_dir)
    high_resolution_file_list.sort()

    # Shuffle both lists with a fixed random seed
    random.seed(random_seed)
    random.shuffle(low_resolution_file_list)
    random.seed(random_seed)
    random.shuffle(high_resolution_file_list)

    # Split the file lists into training, test, and validation sets
    train_files_low_res, test_files_low_res = train_test_split(low_resolution_file_list, test_size=split_ratio[1] + split_ratio[2], random_state=random_seed)
    test_files_low_res, validation_files_low_res = train_test_split(test_files_low_res, test_size=split_ratio[2] / (split_ratio[1] + split_ratio[2]), random_state=random_seed)

    # Create the directories for train, test, and validation
    create_subdirectories(train_dir, ["high_resolution", "low_resolution"])
    create_subdirectories(test_dir, ["high_resolution", "low_resolution"])
    create_subdirectories(validation_dir, ["high_resolution", "low_resolution"])

    # Copy low-resolution images to their respective directories
    for filename in train_files_low_res:
        shutil.copy(os.path.join(low_resolution_source_dir, filename), os.path.join(train_dir, "low_resolution", filename))
    for filename in test_files_low_res:
        shutil.copy(os.path.join(low_resolution_source_dir, filename), os.path.join(test_dir, "low_resolution", filename))
    for filename in validation_files_low_res:
        shutil.copy(os.path.join(low_resolution_source_dir, filename), os.path.join(validation_dir, "low_resolution", filename))

    # Split the file lists for high-resolution images using the same random seed
    train_files_high_res = [filename for filename in high_resolution_file_list if filename in train_files_low_res]
    test_files_high_res = [filename for filename in high_resolution_file_list if filename in test_files_low_res]
    validation_files_high_res = [filename for filename in high_resolution_file_list if filename in validation_files_low_res]

    # Copy high-resolution images to their respective directories
    for filename in train_files_high_res:
        shutil.copy(os.path.join(high_resolution_source_dir, filename), os.path.join(train_dir, "high_resolution", filename))
    for filename in test_files_high_res:
        shutil.copy(os.path.join(high_resolution_source_dir, filename), os.path.join(test_dir, "high_resolution", filename))
    for filename in validation_files_high_res:
        shutil.copy(os.path.join(high_resolution_source_dir, filename), os.path.join(validation_dir, "high_resolution", filename))

# Set the random seed for reproducibility
random_seed = 42

low_resolution_source_dir = "/Users/kavian/Desktop/data/low_resolution_images"
high_resolution_source_dir = "/Users/kavian/Desktop/data/high_resolution_images"
train_dir = "/Users/kavian/Desktop/data/train"
test_dir = "/Users/kavian/Desktop/data/test"
validation_dir = "/Users/kavian/Desktop/data/validation"
split_dataset(low_resolution_source_dir, high_resolution_source_dir, train_dir, test_dir, validation_dir, random_seed=random_seed)
