# Script for Splitting and Copying Images

This script performs the following tasks:

1. **Set up parameters**:
   - Defines the path to the dataset and other parameters.

2. **Prepare the dataset**:
   - Lists all files in the specified image directory.
   - Randomly shuffles the list of files.
   - Splits the files into training and test sets based on a specified proportion.

3. **Copy images to the appropriate directories**:
   - Copies images into training and test directories according to the split.

In [None]:
import os
import shutil
import torch
from lib.random import set_random_seed
from lib.notebook import get_tqdm

In [None]:
os.makedirs("data/celeba/train/0", exist_ok = True)
os.makedirs("data/celeba/test/0", exist_ok = True)

In [None]:
tqdm = get_tqdm()

In [None]:
set_random_seed(7509)

seed: 7509


7509

In [None]:
path = "data/celeba/{}"
img_folder = "img_align_celeba"
p = 0.1

In [None]:
# download and unzip img_align_celeba.zip into data/celeba/
# from, e.g., https://cseweb.ucsd.edu/~weijian/static/datasets/celeba/

In [None]:
files = [datei for datei in os.listdir(path.format(img_folder)) if os.path.isfile(os.path.join(path.format(img_folder), datei))]

KeyboardInterrupt: 

In [None]:
perm = torch.randperm(len(files))

In [None]:
training = perm[int(len(files) * p):]
test = perm[:int(len(files) * p)]

In [None]:
len(training), len(test)

(182340, 20259)

In [None]:
for index in tqdm(training):
    shutil.copy(path.format(img_folder) + "/" + files[index], path.format("train") +  "/0/" + files[index])

  0%|          | 0/182340 [00:00<?, ?it/s]

In [None]:
for index in tqdm(test):
    shutil.copy(path.format(img_folder) +  "/" + files[index], path.format("test") +  "/0/" + files[index])

  0%|          | 0/20259 [00:00<?, ?it/s]