In [1]:
# Import packages
import os
import shutil
import pandas as pd

## CREATES NEW FOLDERS FOR EACH LABEL AND NUMBERED SUB-FOLDERS ##

In [8]:
# Specify the parent directory where you want to create the numbered folders
pass_directory = "Photos Upload\\pass"
fail_directory = "Photos Upload\\fail"

# Ensure the parent directory exists
os.makedirs(pass_directory, exist_ok=True)

# Create numbered folders to match the folders we have labeled images in for pass images
# 0 - 135, 328 - 438
for number in range(0, 10):
    folder_name = str(0) + str(0) + str(number)
    folder_path = os.path.join(pass_directory, folder_name)
    os.makedirs(folder_path, exist_ok=True)
    
for number in range(10, 100):
    folder_name = str(0) + str(number)
    folder_path = os.path.join(pass_directory, folder_name)
    os.makedirs(folder_path, exist_ok=True)

for number in range(100, 136):
    folder_name = str(number)
    folder_path = os.path.join(pass_directory, folder_name)
    os.makedirs(folder_path, exist_ok=True)
    
for number in range(328, 439):
    folder_name = str(number)
    folder_path = os.path.join(pass_directory, folder_name)
    os.makedirs(folder_path, exist_ok=True)
    
# Ensure the parent directory exists
os.makedirs(fail_directory, exist_ok=True)

# Create numbered folders to match the folders we have labeled images in, this time for fail images
# 0 - 135, 328 - 438
for number in range(0, 10):
    folder_name = str(0) + str(0) + str(number)
    folder_path = os.path.join(fail_directory, folder_name)
    os.makedirs(folder_path, exist_ok=True)
    
for number in range(10, 100):
    folder_name = str(0) + str(number)
    folder_path = os.path.join(fail_directory, folder_name)
    os.makedirs(folder_path, exist_ok=True)

for number in range(100, 136):
    folder_name = str(number)
    folder_path = os.path.join(fail_directory, folder_name)
    os.makedirs(folder_path, exist_ok=True)

for number in range(328, 439):
    folder_name = str(number)
    folder_path = os.path.join(fail_directory, folder_name)
    os.makedirs(folder_path, exist_ok=True)

## DEFINE COPY IMAGES FUNCTION

In [9]:
def copy_images_to_label_folders(dataframe, source_directory, target_directory):
    for label in dataframe['CATEGORY 1'].unique():
        label_folder = os.path.join(target_directory, str(label))
        os.makedirs(label_folder, exist_ok=True)

    for index, row in dataframe.iterrows():
        image_filename = row['ext']
        label = row['CATEGORY 1']
        source_path = os.path.join(source_directory, image_filename)
        target_path = os.path.join(target_directory, str(label), image_filename)
        shutil.copy(source_path, target_path)

## LOAD AND APPLY ANY FILTERING TO THE METADATA ## 

In [18]:
# Create a metadata df with only images from the Folders of interest    
meta_of_int = pd.read_csv('metadata.csv')

In [19]:
# THIS FILTERS OUT IMAGES THAT WE COULD NOT ASSIGN A LABEL TO
# Filter out NaN labels
metadata_filtered = meta_of_int[meta_of_int['CATEGORY 1'].astype(str) != 'nan']

# THIS FILTERS OUT ALLIGATOR FREIGHT. ALLIGATOR FREIGHT IMAGES WON'T REACH THIS MODEL IN PRODUCTION.
# Filter out alligator freight
metadata_filtered = metadata_filtered[metadata_filtered['ALLIGATOR'].astype(str) != 'True']

# THIS FILTERS OUT DARK AND BLURRY IMAGES. DARK AND BLURRY IMAGES WON'T REACH THIS MODEL IN PRODUCTION.
# Filter out low-quality images
metadata_filtered = metadata_filtered[metadata_filtered['POOR QUALITY'].astype(str) == 'False']

# THIS FILTERS OUT IMAGES WHERE THE CORRECT LABEL WAS AMBIGUOUS.
# Filter out any ambiguous labels (anything with notes)
#metadata_filtered = metadata_filtered[metadata_filtered['NOTES'].astype(str) == 'nan']

## GRAB ONLY CLOSEOUT OR NON-CLOSEOUT IMAGES

In [20]:
# Take only closeout photos ["Full" column is True]
metadata_filtered = metadata_filtered[metadata_filtered['Full'].astype(str) == 'True']

# Take only non-closeout photos
#metadata_filtered = metadata_filtered[metadata_filtered['Full'].astype(str) == 'False']

# MAKE BINARY LABELS
# Replace 'airbag', 'return to level', and 'restack' with 'fail' in 'CATEGORY 1'
metadata_filtered['CATEGORY 1'] = metadata_filtered['CATEGORY 1'].replace(['airbag', 'Airbag', 'return to level',
                                                                           'Return to Level', 'restack', 'Restack',
                                                                           'strap'], 'fail')
# Replace 'Pass' with 'pass'
metadata_filtered['CATEGORY 1'] = metadata_filtered['CATEGORY 1'].replace(['Pass'],'pass')

# Drop photos labeled as duplicates
metadata_filtered = metadata_filtered[~metadata_filtered['CATEGORY 1'].str.contains("DUPLICATE")]
meta_of_int = metadata_filtered[~metadata_filtered['CATEGORY 1'].str.contains("Duplicate")]
meta_of_int

Unnamed: 0,id,manifestid,documentid,Full,uploaddatetimejsonmetadata,Folder,File,LABEL,SIGN OUT,ALLIGATOR,...,is_far,is_blurry,is_dark,is_angle,is_glare,is_outside,is_zoom,is_duplicate,ext,labels
0,180426.0,43417541.0,83328644.0,True,4/7/2023 7:25,328,644-0.jpg,Vogt,4/29,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,328\644-0.jpg,pass
4,180422.0,43416961.0,83328780.0,True,4/7/2023 7:24,328,780-0.jpg,Vogt,4/29,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,328\780-0.jpg,pass
8,180427.0,43417881.0,83328784.0,True,4/7/2023 7:25,328,784-0.jpg,Vogt,4/29,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,328\784-0.jpg,pass
9,180428.0,43418021.0,83328786.0,True,4/7/2023 7:26,328,786-0.jpg,Vogt,4/29,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,328\786-0.jpg,pass
18,180466.0,43417831.0,83328852.0,True,4/7/2023 8:21,328,852-0.jpg,Vogt,4/29,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,328\852-0.jpg,return to level
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6406,,,,True,,134,371-0.jpg,Vogt,9/19,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,134\371-0.jpg,
6409,,,,True,,134,751-0.jpg,Vogt,9/19,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,134\751-0.jpg,
6410,,,,True,,135,212-0.jpg,Furry,9/19,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,135\212-0.jpg,
6411,,,,True,,135,213-0.jpg,Furry,9/19,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,135\213-0.jpg,


## COPY IMAGES DEFINED BY METADATA FROM OLD FOLDERS TO NEW ONES

In [22]:
# Replace 'Photos Upload\\export_20230424_123416' with the source directory containing images
source_directory = "Photos Upload\\export_20230424_123416"

# Specify the target directory where you want to create folders for each label and move the images
target_directory = "Photos Upload\\"

# Make sure to replace 'train_df' with your actual DataFrame containing image file names and corresponding labels
copy_images_to_label_folders(meta_of_int, source_directory, target_directory)

## MOVES THE NEWLY COPIED IMAGES FROM NUMBERED SUB-FOLDERS TO LABEL_ALL FOLDERS.

In [23]:
# Specify the source directory containing the local folders
pass_directory = "Photos Upload\\pass"

# Specify the target directory where you want to move all the images
pass_all_directory = "Photos Upload\\pass_all"

# Create the target directory if it doesn't exist
os.makedirs(pass_all_directory, exist_ok=True)

# A dictionary to keep track of file names and their counts
pass_file_name_counts = {}

# Loop through the local folders and move the images to the target directory
for folder_number in range(0, 10):
    fn = str(0) + str(0) + str(folder_number)
    folder_path = os.path.join(pass_directory, fn)
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            source_path = os.path.join(folder_path, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(pass_all_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in pass_file_name_counts:
                    pass_file_name_counts[filename] += 1
                else:
                    pass_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{pass_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(pass_all_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)
            
for folder_number in range(10, 100):
    fn = str(0) + str(folder_number)
    folder_path = os.path.join(pass_directory, fn)
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            source_path = os.path.join(folder_path, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(pass_all_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in pass_file_name_counts:
                    pass_file_name_counts[filename] += 1
                else:
                    pass_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{pass_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(pass_all_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)

for folder_number in range(100, 136):
    folder_path = os.path.join(pass_directory, str(folder_number))
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            source_path = os.path.join(folder_path, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(pass_all_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in pass_file_name_counts:
                    pass_file_name_counts[filename] += 1
                else:
                    pass_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{pass_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(pass_all_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)            
            
for folder_number in range(328, 439):
    folder_path = os.path.join(pass_directory, str(folder_number))
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            source_path = os.path.join(folder_path, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(pass_all_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in pass_file_name_counts:
                    pass_file_name_counts[filename] += 1
                else:
                    pass_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{pass_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(pass_all_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)
            
# Specify the source directory containing the local folders
fail_directory = "Photos Upload\\fail"

# Specify the target directory where you want to move all the images
fail_all_directory = "Photos Upload\\fail_all"

# Create the target directory if it doesn't exist
os.makedirs(fail_all_directory, exist_ok=True)

# A dictionary to keep track of file names and their counts
fail_file_name_counts = {}

# Loop through the local folders and move the images to the target directory
for folder_number in range(0, 10):
    fn = str(0) + str(0) + str(folder_number)
    folder_path = os.path.join(fail_directory, fn)
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            source_path = os.path.join(folder_path, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(fail_all_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in fail_file_name_counts:
                    fail_file_name_counts[filename] += 1
                else:
                    fail_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{fail_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(fail_all_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)
            
for folder_number in range(10, 100):
    fn = str(0) + str(folder_number)
    folder_path = os.path.join(fail_directory, fn)
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            source_path = os.path.join(folder_path, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(fail_all_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in fail_file_name_counts:
                    fail_file_name_counts[filename] += 1
                else:
                    fail_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{fail_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(fail_all_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)

for folder_number in range(100, 136):
    folder_path = os.path.join(fail_directory, str(folder_number))
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            source_path = os.path.join(folder_path, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(fail_all_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in fail_file_name_counts:
                    fail_file_name_counts[filename] += 1
                else:
                    fail_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{fail_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(fail_all_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)            
            
for folder_number in range(328, 439):
    folder_path = os.path.join(fail_directory, str(folder_number))
    if os.path.exists(folder_path):
        for filename in os.listdir(folder_path):
            source_path = os.path.join(folder_path, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(fail_all_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in fail_file_name_counts:
                    fail_file_name_counts[filename] += 1
                else:
                    fail_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{fail_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(fail_all_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)

## CREATES TESTING DATA

In [24]:
# Specify the target directory where you want to move the selected images
pass_test_directory = "Photos Upload\\pass_test"

# Create the new target directory if it doesn't exist
os.makedirs(pass_test_directory, exist_ok=True)

# A dictionary to keep track of file names and their counts
pass_file_name_counts = {}

# Loop through the files in the source directory and move images with filenames "800-0" and higher
for filename in os.listdir(pass_all_directory):
    if filename.endswith("-0.jpg"):  # Assuming the image files have a ".jpg" extension
        file_number = int(filename.split("-")[0])
        if file_number >= 800:
            source_path = os.path.join(pass_all_directory, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(pass_test_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in pass_file_name_counts:
                    pass_file_name_counts[filename] += 1
                else:
                    pass_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{pass_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(pass_test_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)
            
# Specify the target directory where you want to move the selected images
fail_test_directory = "Photos Upload\\fail_test"

# Create the new target directory if it doesn't exist
os.makedirs(fail_test_directory, exist_ok=True)

# A dictionary to keep track of file names and their counts
fail_file_name_counts = {}

# Loop through the files in the source directory and move images with filenames "800-0" and higher
for filename in os.listdir(fail_all_directory):
    if filename.endswith("-0.jpg"):  # Assuming the image files have a ".jpg" extension
        file_number = int(filename.split("-")[0])
        if file_number >= 800:
            source_path = os.path.join(fail_all_directory, filename)

            # Check if the file name already exists in the target directory
            target_path = os.path.join(fail_test_directory, filename)
            if os.path.exists(target_path):
                # If the file name already exists, rename the file
                file_name, file_extension = os.path.splitext(filename)
                if filename in fail_file_name_counts:
                    fail_file_name_counts[filename] += 1
                else:
                    fail_file_name_counts[filename] = 1
                new_filename = f"{file_name}_{fail_file_name_counts[filename]}{file_extension}"
                target_path = os.path.join(fail_test_directory, new_filename)

            # Move the file to the target directory
            shutil.move(source_path, target_path)

## DELETE THE ORIGINAL EXPORT AND INITIAL COPY NOW THAT ALL IMAGES HAVE BEEN PROPERLY COPIED TWICE

In [25]:
folder_path = "Photos Upload\\export_20230424_123416"

try:
    shutil.rmtree(folder_path)
    print(f"Folder '{folder_path}' deleted successfully.")
except Exception as e:
    print(f"An error occurred while deleting the folder '{folder_path}': {e}")

Folder 'Photos Upload\export_20230424_123416' deleted successfully.


In [26]:
folder_path = "Photos Upload\\pass"

try:
    shutil.rmtree(folder_path)
    print(f"Folder '{folder_path}' deleted successfully.")
except Exception as e:
    print(f"An error occurred while deleting the folder '{folder_path}': {e}")

Folder 'Photos Upload\pass' deleted successfully.


In [27]:
folder_path = "Photos Upload\\fail"

try:
    shutil.rmtree(folder_path)
    print(f"Folder '{folder_path}' deleted successfully.")
except Exception as e:
    print(f"An error occurred while deleting the folder '{folder_path}': {e}")

Folder 'Photos Upload\fail' deleted successfully.
