### Imports

In [47]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2
import os
import seaborn as sns
import pandas as pd
from skimage.filters import sobel
import shutil
import random
import warnings

### Paths
After downloading the files from the link: 
change the path accordingly, and leave \painting-to-artist\dataset or \painting-to-artist\workspace as is.

In [81]:
dataset_path = os.path.abspath(r'D:\Program Files (x86)\painting-to-artist\dataset')
training_path = os.path.abspath(r'D:\Program Files (x86)\painting-to-artist\workspace\train')
testing_path = os.path.abspath(os.path.join(r'D:\Program Files (x86)\painting-to-artist\workspace\test'))

In [82]:
def sample_images(data_path, train_path, test_path, n, ratio=0.8):
    
    if ratio >= 1 or ratio <= 0:
        raise ValueError("The ratio should be between 0 and 1 (non-inclusive)")
    
    if ratio < 0.5:
        warnings.warn("The ratio is less than 0.5, not advised for good training")
    
    artists = os.listdir(data_path) # list of artists
    
    for artist in artists:
        artist_path = os.path.join(data_path, artist)  # path to the artist
        images = os.listdir(artist_path)               # list of images
        random.shuffle(images)                         # shuffle the images inside the images list

        ## Adjust the number of images based on availability
        n_train = min(int(n * ratio), int(len(images) * ratio))  # 80% of the images
        n_test = min(n-n_train, len(images) - n_train)           # 20% of the images

        ## Partition the images to training and testing
        train_images = images[:n_train]
        test_images = images[n_train:n_train + n_test]

        ## Create the directories for training and testing
        artist_train_path = os.path.join(train_path, artist)
        artist_test_path = os.path.join(test_path, artist)
        os.makedirs(artist_train_path, exist_ok=True)
        os.makedirs(artist_test_path, exist_ok=True)

        # Create the directories
        for image in train_images:
            image_path = os.path.join(artist_path, image)
            shutil.copy(image_path, os.path.join(artist_train_path, image))

        for image in test_images:
            image_path = os.path.join(artist_path, image)
            shutil.copy(image_path, os.path.join(artist_test_path, image))

In [85]:
def clear_files(directory):
    """
    Clears all files within the subdirectories of the given directory.
    """
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            os.remove(file_path)

### Sampling the Images to Training and Testing

In [84]:
sample_images(dataset_path, training_path, testing_path, 40, 0.8)

In [86]:
clear_files(training_path)
clear_files(testing_path)