In [1]:
import requests
import json

# URL pointing directly to the raw JSON data
url = 'https://huggingface.co/datasets/BoKelvin/SLAKE/raw/main/train.json'

# Sending a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    try:
        # Load the JSON data
        data = response.json()
        
        # Specify the filename
        filename = 'train.json'
        
        # Write JSON data to a file
        with open(filename, 'w') as file:
            json.dump(data, file, indent=4)
        
        print(f"Data downloaded and saved successfully to {filename}!")
        
    except ValueError as e:
        print("Failed to decode JSON: ", e)
else:
    print("Failed to download data. Status code:", response.status_code)


Data downloaded and saved successfully to train.json!


In [2]:
import json

def find_keys(obj, keys_set):
    """ Recursively find all keys in a JSON object and add them to a set. """
    if isinstance(obj, dict):
        for key, value in obj.items():
            keys_set.add(key)
            find_keys(value, keys_set)
    elif isinstance(obj, list):
        for item in obj:
            find_keys(item, keys_set)

def main():
    filename = 'train.json'  # Path to your JSON file

    # Load the JSON data from the file
    try:
        with open(filename, 'r') as file:
            data = json.load(file)
        
        # Set to store unique keys
        unique_keys = set()
        
        # Find all unique keys in the JSON data
        find_keys(data, unique_keys)
        
        # Print all unique keys
        print("Unique keys in the JSON file:")
        for key in unique_keys:
            print(key)

    except FileNotFoundError:
        print(f"Error: The file {filename} does not exist.")
    except json.JSONDecodeError:
        print("Error: Failed to decode JSON from the file.")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()



Unique keys in the JSON file:
qid
img_name
content_type
img_id
location
triple
answer
q_lang
base_type
modality
answer_type
question


In [3]:
import os
import requests

# Define the URL and the download path
url = 'https://huggingface.co/datasets/BoKelvin/SLAKE/raw/main/imgs.zip'
download_path = '/Users/jeffreysherer/Downloads/BBandMaskProcessing/imgs.zip'

# Create directory if it does not exist
os.makedirs(os.path.dirname(download_path), exist_ok=True)

try:
    # Download the file
    response = requests.get(url, stream=True, timeout=10)
    response.raise_for_status()

    # Write to file
    with open(download_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=8192):
            file.write(chunk)

    print(f"File downloaded successfully: {download_path}")

except requests.exceptions.RequestException as e:
    print(f"Download failed: {e}")
except FileNotFoundError as e:
    print(f"File not found error: {e}")
except Exception as e:
    print(f"An error occurred: {e}")


File downloaded successfully: /Users/jeffreysherer/Downloads/BBandMaskProcessing/imgs.zip


In [4]:
import zipfile

zip_path = '/teamspace/studios/this_studio/imgs.zip'

# Check if the file is a valid zip file
is_zip = zipfile.is_zipfile(zip_path)
if is_zip:
    print(f"{zip_path} is a valid zip file.")
else:
    print(f"{zip_path} is not a valid zip file or is corrupted.")


/teamspace/studios/this_studio/imgs.zip is not a valid zip file or is corrupted.


In [7]:
import os
print("Current Working Directory:", os.getcwd())
print("Files in Current Directory:", os.listdir('.'))
os.makedirs('data', exist_ok=True)
print("Created 'data' directory")
subfolder = 'data'
print(f"Contents of '{subfolder}':", os.listdir(subfolder))
print("Absolute Path to 'data' Directory:", os.path.abspath('data'))


Current Working Directory: /Users/jeffreysherer/Dissertation
Files in Current Directory: ['README.md', 'train.json', '.git', 'BBandMaskProcessing.ipynb']
Created 'data' directory
Contents of 'data': []
Absolute Path to 'data' Directory: /Users/jeffreysherer/Dissertation/data


In [8]:
import os
import shutil

# Define source and destination paths
source_dir = '/Users/jeffreysherer/Downloads/imgs-1'
destination_dir = '/Users/jeffreysherer/Dissertation/data/imgs-1'

# Check if destination directory exists, if not create it
os.makedirs(destination_dir, exist_ok=True)

try:
    # Copy entire directory tree to the new location
    shutil.copytree(source_dir, destination_dir, dirs_exist_ok=True)
    print(f"All files copied successfully to {destination_dir}")
except shutil.Error as e:
    print(f"Error occurred during copying: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


All files copied successfully to /Users/jeffreysherer/Dissertation/data/imgs-1


In [9]:
import os
import shutil

# Base directory containing source folders
source_base_dir = 'data/imgs-1'

# Target base directory to store subfolders with JSON files
target_base_dir = '/Users/jeffreysherer/Dissertation/processed_jsons'

# Ensure the target directory exists
if not os.path.exists(target_base_dir):
    os.makedirs(target_base_dir)

# Check if the source directory exists
if not os.path.exists(source_base_dir):
    print(f"Error: Source directory {source_base_dir} does not exist.")
else:
    # Loop through each directory in the source base directory
    for dirname in sorted(os.listdir(source_base_dir)):
        if dirname.startswith('xmlab'):  # Check if the directory name starts with 'xmlab'
            source_dir_path = os.path.join(source_base_dir, dirname)
            json_file_path = os.path.join(source_dir_path, 'detection.json')

            if os.path.isfile(json_file_path):  # Check if the JSON file exists
                # Create a new subdirectory for this JSON file in the target directory
                new_subdir_name = f"{dirname}detection"
                target_subdir_path = os.path.join(target_base_dir, new_subdir_name)
                if not os.path.exists(target_subdir_path):
                    os.makedirs(target_subdir_path)

                # Define the target file path
                target_file_path = os.path.join(target_subdir_path, 'detection.json')

                # Copy the JSON file to the new location
                shutil.copy2(json_file_path, target_file_path)
                print(f"File {json_file_path} copied to {target_file_path}")
            else:
                print(f"No detection.json found in {source_dir_path}")
        else:
            print(f"Skipped {dirname} as it does not match the 'xmlab*' pattern")


Skipped .DS_Store as it does not match the 'xmlab*' pattern
File data/imgs-1/xmlab0/detection.json copied to /Users/jeffreysherer/Dissertation/processed_jsons/xmlab0detection/detection.json
File data/imgs-1/xmlab1/detection.json copied to /Users/jeffreysherer/Dissertation/processed_jsons/xmlab1detection/detection.json
File data/imgs-1/xmlab10/detection.json copied to /Users/jeffreysherer/Dissertation/processed_jsons/xmlab10detection/detection.json
File data/imgs-1/xmlab100/detection.json copied to /Users/jeffreysherer/Dissertation/processed_jsons/xmlab100detection/detection.json
File data/imgs-1/xmlab101/detection.json copied to /Users/jeffreysherer/Dissertation/processed_jsons/xmlab101detection/detection.json
File data/imgs-1/xmlab102/detection.json copied to /Users/jeffreysherer/Dissertation/processed_jsons/xmlab102detection/detection.json
File data/imgs-1/xmlab103/detection.json copied to /Users/jeffreysherer/Dissertation/processed_jsons/xmlab103detection/detection.json
File data/im

In [11]:
import os
import json
import shutil

def has_numerical_values(data):
    """Recursively search for any numerical value in JSON data."""
    if isinstance(data, dict):
        return any(has_numerical_values(v) for v in data.values())
    elif isinstance(data, list):
        return any(has_numerical_values(item) for item in data)
    elif isinstance(data, (int, float)):
        return True
    return False

# Adjust these paths to directories where you have write permissions
base_dir = '/Users/jeffreysherer/Dissertation/processed_jsons'
numerical_base_dir = '/Users/jeffreysherer/Dissertation/numerical_detections'

# Ensure the target directory exists
os.makedirs(numerical_base_dir, exist_ok=True)

# Loop through each subdirectory in the base directory
for subdir in os.listdir(base_dir):
    subdir_path = os.path.join(base_dir, subdir)
    json_file_path = os.path.join(subdir_path, 'detection.json')

    if os.path.isfile(json_file_path):
        # Read the JSON file
        with open(json_file_path, 'r') as file:
            try:
                data = json.load(file)
            except json.JSONDecodeError:
                print(f"Failed to decode JSON in file {json_file_path}")
                continue

        # Check if the JSON data contains any numerical values
        if has_numerical_values(data):
            # Prepare a similar directory structure in the numerical detections directory
            numerical_subdir_path = os.path.join(numerical_base_dir, subdir)
            os.makedirs(numerical_subdir_path, exist_ok=True)

            # Define the target file path in the new directory
            target_file_path = os.path.join(numerical_subdir_path, 'detection.json')

            # Copy the JSON file to the new location
            shutil.copy2(json_file_path, target_file_path)
            print(f"File {json_file_path} copied to {target_file_path}")
        else:
            print(f"No numerical values found in {json_file_path}")
    else:
        print(f"No detection.json file found in {subdir_path}")


File /Users/jeffreysherer/Dissertation/processed_jsons/xmlab554detection/detection.json copied to /Users/jeffreysherer/Dissertation/numerical_detections/xmlab554detection/detection.json
File /Users/jeffreysherer/Dissertation/processed_jsons/xmlab555detection/detection.json copied to /Users/jeffreysherer/Dissertation/numerical_detections/xmlab555detection/detection.json
No numerical values found in /Users/jeffreysherer/Dissertation/processed_jsons/xmlab266detection/detection.json
File /Users/jeffreysherer/Dissertation/processed_jsons/xmlab267detection/detection.json copied to /Users/jeffreysherer/Dissertation/numerical_detections/xmlab267detection/detection.json
File /Users/jeffreysherer/Dissertation/processed_jsons/xmlab28detection/detection.json copied to /Users/jeffreysherer/Dissertation/numerical_detections/xmlab28detection/detection.json
File /Users/jeffreysherer/Dissertation/processed_jsons/xmlab108detection/detection.json copied to /Users/jeffreysherer/Dissertation/numerical_dete

In [13]:
import os

def count_directories(path):
    """Count directories in a given path."""
    # Ensure the directory exists before listing
    if not os.path.exists(path):
        os.makedirs(path)  # Create the directory if it does not exist
        return 0  # Return zero since the directory was just created
    return sum(1 for entry in os.listdir(path) if os.path.isdir(os.path.join(path, entry)))

# Relative paths for the directories
processed_json_dir = 'processed_jsons'
numerical_detections_dir = 'numerical_detections'

# Count directories in both paths
processed_count = count_directories(processed_json_dir)
numerical_count = count_directories(numerical_detections_dir)

# Print the counts and the difference
print(f"Total directories in processed_jsons: {processed_count}")
print(f"Total directories in numerical_detections: {numerical_count}")
print(f"Number of directories removed (no numerical data): {processed_count - numerical_count}")


Total directories in processed_jsons: 642
Total directories in numerical_detections: 580
Number of directories removed (no numerical data): 62


In [15]:
import os
import shutil

# Base directory relative to the current working directory of your script
base_dir = os.getcwd()  # Adjust this to 'os.getcwd()' to make it dynamically refer to the current working directory

# Define source, bounding box, and mask directories using relative paths
source_dir = os.path.join(base_dir, 'numerical_detections')
bounding_box_dir = os.path.join(base_dir, 'bounding_box')
mask_dir = os.path.join(base_dir, 'mask')

# Ensure the bounding box and mask directories exist
os.makedirs(bounding_box_dir, exist_ok=True)
os.makedirs(mask_dir, exist_ok=True)

# Define bounding box indices based on your provided list
bounding_box_indices = set(range(121, 166)) | set(range(156, 178)) | set(range(295, 356)) | set(range(388, 390))

# Process each subdirectory in the source directory
if os.path.exists(source_dir):
    for subdir in os.listdir(source_dir):
        # Extract numeric part assuming format 'xmlab<number>' possibly followed by additional text
        subdir_index_str = ''.join(filter(str.isdigit, subdir[len('xmlab'):]))  # Get numeric part
        if subdir_index_str:
            subdir_index = int(subdir_index_str)  # Convert to integer
            subdir_path = os.path.join(source_dir, subdir)

            if subdir_index in bounding_box_indices:
                # Move to bounding box directory
                target_subdir_path = os.path.join(bounding_box_dir, subdir)
            else:
                # Move to mask directory
                target_subdir_path = os.path.join(mask_dir, subdir)

            # Move the folder
            shutil.move(subdir_path, target_subdir_path)
            print(f"Moved {subdir} to {target_subdir_path}")
else:
    print(f"Source directory {source_dir} does not exist.")

print("Files have been organized into bounding box and mask folders.")


Moved xmlab554detection to /Users/jeffreysherer/Dissertation/mask/xmlab554detection
Moved xmlab555detection to /Users/jeffreysherer/Dissertation/mask/xmlab555detection
Moved xmlab267detection to /Users/jeffreysherer/Dissertation/mask/xmlab267detection
Moved xmlab28detection to /Users/jeffreysherer/Dissertation/mask/xmlab28detection
Moved xmlab108detection to /Users/jeffreysherer/Dissertation/mask/xmlab108detection
Moved xmlab29detection to /Users/jeffreysherer/Dissertation/mask/xmlab29detection
Moved xmlab109detection to /Users/jeffreysherer/Dissertation/mask/xmlab109detection
Moved xmlab487detection to /Users/jeffreysherer/Dissertation/mask/xmlab487detection
Moved xmlab486detection to /Users/jeffreysherer/Dissertation/mask/xmlab486detection
Moved xmlab141detection to /Users/jeffreysherer/Dissertation/bounding_box/xmlab141detection
Moved xmlab327detection to /Users/jeffreysherer/Dissertation/bounding_box/xmlab327detection
Moved xmlab61detection to /Users/jeffreysherer/Dissertation/mask

In [17]:
import os

def count_folders(directory):
    """Count the number of directories within a specified directory."""
    # Check if the directory exists, create if it doesn't
    if not os.path.exists(directory):
        os.makedirs(directory)
        return 0  # No folders initially if just created
    return sum(1 for item in os.listdir(directory) if os.path.isdir(os.path.join(directory, item)))

# Base directory relative to the current script
base_dir = 'this_studio'  # This is relative to where your script is run

# Define the paths to the bounding box and mask directories
bounding_box_dir = os.path.join(base_dir, 'bounding_box')
mask_dir = os.path.join(base_dir, 'mask')

# Ensure directories exist before counting
os.makedirs(bounding_box_dir, exist_ok=True)
os.makedirs(mask_dir, exist_ok=True)

# Count the directories in each
bounding_box_count = count_folders(bounding_box_dir)
mask_count = count_folders(mask_dir)

# Print the counts
print(f"Number of folders in bounding box directory: {bounding_box_count}")
print(f"Number of folders in mask directory: {mask_count}")


Number of folders in bounding box directory: 0
Number of folders in mask directory: 0


In [19]:
import os

# Base directory relative to the current script's location
base_dir = 'this_studio'  # Adjust this as needed

# Full path to the bounding box directory
bounding_box_dir = os.path.join(base_dir, 'bounding_box')

# Ensure the directory exists
if not os.path.exists(bounding_box_dir):
    os.makedirs(bounding_box_dir)
    print("Created the directory as it did not exist.")
    folders = []  # No folders to list if we just created the directory
else:
    # List all folders and sort them numerically based on the embedded number
    folders = sorted(os.listdir(bounding_box_dir), key=lambda x: int(''.join(filter(str.isdigit, x))))

# Print all folders
for folder in folders:
    print(folder)

# Optionally, count them
print(f"Total folders found: {len(folders)}")


Total folders found: 0


In [24]:
import os
import shutil

# Base directory relative to the current script's location
base_dir = 'this_studio'  # Adjust this as needed or use os.getcwd() to dynamically refer to the current working directory

# Define the source and destination directories using relative paths
source_dir = os.path.join(base_dir, 'processed_jsons')
destination_dir = os.path.join(base_dir, 'bounding_box')

# Ensure both directories exist to avoid errors
os.makedirs(source_dir, exist_ok=True)
os.makedirs(destination_dir, exist_ok=True)

# List of missing folders to move
missing_folders = ['xmlab178detection', 'xmlab356detection', 'xmlab390detection']

# Loop through the missing folders and move them if found
for folder in missing_folders:
    source_folder_path = os.path.join(source_dir, folder)
    destination_folder_path = os.path.join(destination_dir, folder)

    # Check if the folder exists in the source directory
    if os.path.exists(source_folder_path):
        # Move the folder to the bounding box directory
        shutil.move(source_folder_path, destination_folder_path)
        print(f"Moved {folder} to the bounding box directory.")
    else:
        print(f"Folder {folder} not found in the processed JSON directory.")

Folder xmlab178detection not found in the processed JSON directory.
Folder xmlab356detection not found in the processed JSON directory.
Folder xmlab390detection not found in the processed JSON directory.


In [25]:
import os
import shutil

# Base directory relative to the current script's location
base_dir = 'this_studio'  # You can adjust this based on your directory structure or use os.getcwd() for the current working directory

# Define the source and destination directories using relative paths
source_dir = os.path.join(base_dir, 'processed_jsons')
destination_dir = os.path.join(base_dir, 'bounding_box')

# Ensure both directories exist to avoid FileNotFoundError
os.makedirs(source_dir, exist_ok=True)
os.makedirs(destination_dir, exist_ok=True)

# Generate a list of missing folders based on your specifications
missing_folders = [f'xmlab{num}detection' for num in [120] + list(range(179, 200)) + list(range(391, 394))]

# Process each folder, moving it if found in the source directory
for folder_name in missing_folders:
    source_folder_path = os.path.join(source_dir, folder_name)
    destination_folder_path = os.path.join(destination_dir, folder_name)

    # Check if the folder exists in the source directory
    if os.path.exists(source_folder_path):
        # Move the folder to the bounding box directory
        shutil.move(source_folder_path, destination_folder_path)
        print(f"Moved {folder_name} to the bounding box directory.")
    else:
        print(f"Folder {folder_name} not found in the processed JSON directory.")


Folder xmlab120detection not found in the processed JSON directory.
Folder xmlab179detection not found in the processed JSON directory.
Folder xmlab180detection not found in the processed JSON directory.
Folder xmlab181detection not found in the processed JSON directory.
Folder xmlab182detection not found in the processed JSON directory.
Folder xmlab183detection not found in the processed JSON directory.
Folder xmlab184detection not found in the processed JSON directory.
Folder xmlab185detection not found in the processed JSON directory.
Folder xmlab186detection not found in the processed JSON directory.
Folder xmlab187detection not found in the processed JSON directory.
Folder xmlab188detection not found in the processed JSON directory.
Folder xmlab189detection not found in the processed JSON directory.
Folder xmlab190detection not found in the processed JSON directory.
Folder xmlab191detection not found in the processed JSON directory.
Folder xmlab192detection not found in the proces

In [26]:
import os

# Base directory relative to the current script's location
base_dir = 'this_studio'  # Adjust this base directory as per your project's structure

# Define the path to the bounding box directory using a relative path
bounding_box_dir = os.path.join(base_dir, 'bounding_box')

# Ensure the directory exists to avoid FileNotFoundError when checking for folders
os.makedirs(bounding_box_dir, exist_ok=True)

# Define the ranges to check
ranges_to_check = list(range(295, 394)) + list(range(120, 200))  # Combines both ranges into one list

# Function to generate folder names based on the specified range
def generate_folder_name(num):
    return f"xmlab{num}detection"

# Check each folder in the defined ranges
missing_folders = []
for num in ranges_to_check:
    folder_name = generate_folder_name(num)
    folder_path = os.path.join(bounding_box_dir, folder_name)
    # Check if the folder exists
    if not os.path.exists(folder_path):
        missing_folders.append(folder_name)

# Report the findings
if missing_folders:
    print("The following folders are missing:")
    for folder in missing_folders:
        print(folder)
else:
    print("All folders from xmlab295 to xmlab393 and xmlab120 to xmlab199 are present in the bounding box directory.")


The following folders are missing:
xmlab295detection
xmlab296detection
xmlab297detection
xmlab298detection
xmlab299detection
xmlab300detection
xmlab301detection
xmlab302detection
xmlab303detection
xmlab304detection
xmlab305detection
xmlab306detection
xmlab307detection
xmlab308detection
xmlab309detection
xmlab310detection
xmlab311detection
xmlab312detection
xmlab313detection
xmlab314detection
xmlab315detection
xmlab316detection
xmlab317detection
xmlab318detection
xmlab319detection
xmlab320detection
xmlab321detection
xmlab322detection
xmlab323detection
xmlab324detection
xmlab325detection
xmlab326detection
xmlab327detection
xmlab328detection
xmlab329detection
xmlab330detection
xmlab331detection
xmlab332detection
xmlab333detection
xmlab334detection
xmlab335detection
xmlab336detection
xmlab337detection
xmlab338detection
xmlab339detection
xmlab340detection
xmlab341detection
xmlab342detection
xmlab343detection
xmlab344detection
xmlab345detection
xmlab346detection
xmlab347detection
xmlab348det

In [27]:
import os

# Base directory relative to the current script's location
base_dir = 'this_studio'  # Adjust this base directory as per your project's structure

# Define the path to the bounding box directory using a relative path
bounding_box_dir = os.path.join(base_dir, 'bounding_box')

# Ensure the directory exists to avoid FileNotFoundError when listing folders
os.makedirs(bounding_box_dir, exist_ok=True)

# List all folders and sort them numerically based on the embedded number
# Adding a check to ensure the directory isn't empty to avoid errors
folders = []
if os.path.exists(bounding_box_dir) and os.listdir(bounding_box_dir):
    folders = sorted(os.listdir(bounding_box_dir), key=lambda x: int(''.join(filter(str.isdigit, x))))

# Print all folders
for folder in folders:
    print(folder)

# Optionally, count them
print(f"Total folders found: {len(folders)}")


Total folders found: 0


In [28]:
import os
import shutil

# Base directory relative to the current script's location
base_dir = 'this_studio'  # Adjust this base directory as per your project's structure

# Define the source and destination directories using relative paths
source_dir = os.path.join(base_dir, 'processed_jsons')
destination_dir = os.path.join(base_dir, 'bounding_box')

# Ensure both directories exist to avoid FileNotFoundError when attempting to move folders
os.makedirs(source_dir, exist_ok=True)
os.makedirs(destination_dir, exist_ok=True)

# Generate a list of folders to move, from xmlab357detection to xmlab387detection
missing_folders = [f'xmlab{num}detection' for num in range(357, 388)]

# Process each folder, moving it if found in the source directory
for folder_name in missing_folders:
    source_folder_path = os.path.join(source_dir, folder_name)
    destination_folder_path = os.path.join(destination_dir, folder_name)

    # Check if the folder exists in the source directory
    if os.path.exists(source_folder_path):
        # Move the folder to the bounding box directory
        shutil.move(source_folder_path, destination_folder_path)
        print(f"Moved {folder_name} to the bounding box directory.")
    else:
        print(f"Folder {folder_name} not found in the processed JSON directory.")


Folder xmlab357detection not found in the processed JSON directory.
Folder xmlab358detection not found in the processed JSON directory.
Folder xmlab359detection not found in the processed JSON directory.
Folder xmlab360detection not found in the processed JSON directory.
Folder xmlab361detection not found in the processed JSON directory.
Folder xmlab362detection not found in the processed JSON directory.
Folder xmlab363detection not found in the processed JSON directory.
Folder xmlab364detection not found in the processed JSON directory.
Folder xmlab365detection not found in the processed JSON directory.
Folder xmlab366detection not found in the processed JSON directory.
Folder xmlab367detection not found in the processed JSON directory.
Folder xmlab368detection not found in the processed JSON directory.
Folder xmlab369detection not found in the processed JSON directory.
Folder xmlab370detection not found in the processed JSON directory.
Folder xmlab371detection not found in the proces

In [29]:
import os
import shutil

# Base directory relative to the current script's location
base_dir = 'this_studio'  # Adjust this base directory as per your project's structure

# Define the directories using relative paths
bounding_box_dir = os.path.join(base_dir, 'bounding_box')
mask_dir = os.path.join(base_dir, 'mask')

# Ensure both directories exist to avoid FileNotFoundError when listing folders
os.makedirs(bounding_box_dir, exist_ok=True)
os.makedirs(mask_dir, exist_ok=True)

# Get lists of folders in both directories
bounding_box_folders = set(os.listdir(bounding_box_dir))
mask_folders = set(os.listdir(mask_dir))

# Find intersections (folders that should not be in the mask directory)
intersecting_folders = bounding_box_folders.intersection(mask_folders)

# Remove intersecting folders from the mask directory
for folder in intersecting_folders:
    folder_path = os.path.join(mask_dir, folder)
    # Remove the folder entirely
    shutil.rmtree(folder_path)
    print(f"Removed {folder} from the mask directory as it was found in the bounding box directory.")

# Optionally, report remaining valid folders
remaining_mask_folders = set(os.listdir(mask_dir))
print("Remaining folders in the mask directory are confirmed to be exclusive from the bounding box directory.")
for folder in remaining_mask_folders:
    print(folder)


Remaining folders in the mask directory are confirmed to be exclusive from the bounding box directory.


In [None]:
import os
import zipfile

def zip_directory(folder_path, output_path):
    """Zips the contents of a folder into a zip file at the specified output path."""
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                # Create a proper archive path to keep the directory structure
                archive_path = os.path.relpath(os.path.join(root, file), os.path.join(folder_path, '..'))
                zipf.write(os.path.join(root, file), archive_path)
        print(f"Created zip archive: {output_path}")

# Base directory relative to the current script's location
base_dir = 'this_studio'  # Adjust this as needed

# Define the directories to zip using relative paths
directories_to_zip = {
    'mask': os.path.join(base_dir, 'mask'),
    'numerical_detections': os.path.join(base_dir, 'numerical_detections'),
    'bounding_box': os.path.join(base_dir, 'bounding_box')
}

# Loop through the directories and zip each one
for key, path in directories_to_zip.items():
    # Ensure the directory exists to avoid errors during zipping
    if not os.path.exists(path):
        os.makedirs(path)
        print(f"Created directory {path} as it did not exist.")
    output_zip_path = os.path.join(base_dir, f"{key}.zip")
    zip_directory(path, output_zip_path)


In [None]:
import os
import shutil

def copy_files(src_dir, dest_dir):
    """Copy all relevant files from src_dir to dest_dir."""
    relevant_files = ['source.jpg', 'mask.png', 'detection.json', 'question.json']
    for file_name in relevant_files:
        src_file = os.path.join(src_dir, file_name)
        dest_file = os.path.join(dest_dir, file_name)
        # Check if the source file exists before copying
        if os.path.exists(src_file):
            shutil.copy2(src_file, dest_file)
            print(f"Copied {src_file} to {dest_file}")
        else:
            print(f"File {src_file} does not exist, skipping.")

def consolidate_files(imgs_base_path, mask_base_path, bounding_box_base_path):
    """Consolidate files from imgs directory to respective mask or bounding_box directories."""
    # Loop through all subdirectories in imgs base path
    if os.path.exists(imgs_base_path):
        for subdir in os.listdir(imgs_base_path):
            src_dir = os.path.join(imgs_base_path, subdir)
            dest_dir = None
            # Check if the subdirectory corresponds to a mask or bounding box instance
            if subdir.endswith("detection"):
                subdir_num = int(subdir.replace("xmlab", "").replace("detection", ""))
                if subdir_num >= 120:  # Assuming all instances with 120 or above go to bounding_box
                    dest_dir = os.path.join(bounding_box_base_path, subdir)
                else:  # Otherwise, they go to mask
                    dest_dir = os.path.join(mask_base_path, subdir)
            
            if dest_dir and not os.path.exists(dest_dir):
                os.makedirs(dest_dir)

            # Copy relevant files
            if dest_dir:
                copy_files(src_dir, dest_dir)
    else:
        print(f"The directory {imgs_base_path} does not exist.")

# Define base paths using a relative approach
base_dir = 'this_studio'  # This should be adjusted according to your actual directory structure

imgs_base_path = os.path.join(base_dir, 'imgs')
mask_base_path = os.path.join(base_dir, 'mask')
bounding_box_base_path = os.path.join(base_dir, 'bounding_box')

# Ensure base directories exist
os.makedirs(imgs_base_path, exist_ok=True)
os.makedirs(mask_base_path, exist_ok=True)
os.makedirs(bounding_box_base_path, exist_ok=True)

# Consolidate files
consolidate_files(imgs_base_path, mask_base_path, bounding_box_base_path)