In [None]:
!pip install patool
import patoolib
patoolib.extract_archive("train-resized.zip")

In [None]:
import shutil
import pandas as pd
import os

def move_files(source_folder, files, destination_folder):
    for file_name in files:
        source_path = os.path.join(source_folder, file_name)
        destination_path = os.path.join(destination_folder, file_name)
        shutil.move(source_path, destination_path)

def create_validation_set(train_folder, validation_folder, split_ratio=0.2):
    # Create the validation folder if it doesn't exist
    if not os.path.exists(validation_folder):
        os.makedirs(validation_folder)

    # List all files in the source folder
    files = os.listdir(train_folder)

    # Calculate the number of files to move to the validation set
    num_files = len(files)
    num_validation_files = int(num_files * split_ratio)

    # Select the first portion of files for the validation set
    validation_files = files[:num_validation_files]

    train_csv = "train-labels.csv"
    # Read the CSV file into a DataFrame
    df = pd.read_csv(train_csv)
    df.sort_values(["image_name"], inplace=True)
    # Create a new DataFrame with the first num_validation_files rows
    validation_df = df.iloc[:num_validation_files]
    # Save the validation DataFrame to a new CSV file
    validation_csv_file = "validation-labels.csv"
    validation_df.to_csv(validation_csv_file, index=False)
    # Remove the first num_validation_files rows from the original DataFrame
    df = df.iloc[num_validation_files:]
    # Save the modified DataFrame back to the original CSV file
    df.to_csv(train_csv, index=False)

    move_files(train_folder, validation_files, validation_folder)

# RUN ONLY ONCE !
create_validation_set("train", "validation")
print("train:", len(os.listdir("train")), "| val: ", len(os.listdir("validation")))

In [None]:
# Revert validation set creation (empties validation folder and puts the files
# back to training folder and modifies the csv accordingly)
files = os.listdir("validation")
move_files("validation", files, "train")
print("train:", len(os.listdir("train")), "| val: ", len(os.listdir("validation")))

validation_csv = "validation-labels.csv"
train_csv = "train-labels.csv"
validation_df = pd.read_csv(validation_csv)
train_df = pd.read_csv(train_csv)
restored_train_df = pd.concat([validation_df, train_df], ignore_index=True)
restored_train_df.to_csv(train_csv, index=False)
os.remove(validation_csv)
print(len(restored_train_df))

In [None]:
import os
import shutil
import pandas as pd

def create_class_folders(image_folder, csv_file):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file, header=0, names=['Image', 'Class'])

    # Create two class folders within the "train" directory
    class0_folder = os.path.join(image_folder, 'class0')
    class1_folder = os.path.join(image_folder, 'class1')

    os.makedirs(class0_folder)
    os.makedirs(class1_folder)

    # Move images to their respective class folders
    for index, row in df.iterrows():
        image_name = row['Image'] + '.jpg'  # Assuming image files have the '.jpg' extension
        image_path = os.path.join(image_folder, image_name)
        
        if row['Class'] == 0:
            destination_folder = class0_folder
        elif row['Class'] == 1:
            destination_folder = class1_folder
        else:
            print(f"Skipping invalid class label for image {image_name}")
            continue

        # Move the image to the destination folder
        shutil.move(image_path, os.path.join(destination_folder, image_name))

create_class_folders("train", "train-labels.csv")
create_class_folders("validation", "validation-labels.csv")
print("Images have been organized into class folders.")