In [None]:
import pandas as pd
import os
from shutil import copyfile, move
from sklearn.preprocessing import KBinsDiscretizer

In [None]:
images_root_folder = '/realtor_images/'
classified_root_folder = '{}/classified/'.format(os.getcwd())

train_root_folder = '{}/train/'.format(os.getcwd())
val_root_folder = '{}/val/'.format(os.getcwd())
test_root_folder = '{}/test/'.format(os.getcwd())

In [None]:
def place_images_to_class_folder(class_identifier, mls_number):
    mls_folder = '{}{}/'.format(images_root_folder, mls_number)
    classified_folder = '{}{}/'.format(classified_root_folder, class_identifier)
    os.makedirs(classified_folder, exist_ok=True)

    if os.path.isdir(mls_folder):
        for file in os.listdir(mls_folder):
            copyfile(mls_folder + file, classified_folder + file)

In [None]:
realtor_properties_df = pd.read_csv('realtor_properties.csv')

properties_total_price = realtor_properties_df[['total_price']]
kBinsDisc = KBinsDiscretizer(n_bins=100, encode='ordinal', strategy='quantile')
realtor_properties_df[['class']] = pd.DataFrame(kBinsDisc.fit_transform(properties_total_price),
                                                columns=properties_total_price.columns
                                                )
realtor_properties_df['class'] = realtor_properties_df['class'].astype(int)

for index, row in realtor_properties_df.iterrows():
    place_images_to_class_folder(row['class'], row['mls'])

# Create testing folder with all the classes in it.
for class_folder in os.listdir(classified_root_folder):
    class_folder_path = os.path.join(classified_root_folder, class_folder)
    if not os.path.isdir(class_folder_path):
        continue

    first_image_file_name = os.listdir(class_folder_path)[0]
    first_image_file_path = os.path.join(class_folder_path, first_image_file_name)

    destination_file_folder = os.path.join(test_root_folder, class_folder)
    os.makedirs(destination_file_folder, exist_ok=True)

    move(os.path.join(class_folder_path, first_image_file_name),
         os.path.join(destination_file_folder, first_image_file_name)
         )

# Create training and validation dataset
for class_folder in os.listdir(classified_root_folder):
    class_folder_path = os.path.join(classified_root_folder, class_folder)
    if not os.path.isdir(class_folder_path):
        continue

    files_in_folder = os.listdir(class_folder_path)
    total_images = len(files_in_folder)
    train_images_count = ((80 / total_images) * 100)

    index = 0
    for image_file_name in os.listdir(class_folder_path):
        destination_file_folder = ''
        if index < train_images_count:
            destination_file_folder = os.path.join(train_root_folder, class_folder)
        else:
            destination_file_folder = os.path.join(val_root_folder, class_folder)

        os.makedirs(destination_file_folder, exist_ok=True)
        move(os.path.join(class_folder_path, image_file_name),
             os.path.join(destination_file_folder, image_file_name)
             )
        index += 1