In [None]:
import os
import shutil
from PIL import Image


In [11]:
def count_files(directory):
    """Counts the number of files in the given directory."""
    if not os.path.exists(directory):
        print("Directory does not exist.")
        return 0

    file_count = 0
    for item in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, item)):
            file_count += 1

    return file_count

In [16]:
import os
import shutil

def move_zero_byte_files(source_dir, dest_dir, log_file):
    zero_byte_files_count = 0

    # Check if source directory exists
    if not os.path.exists(source_dir):
        print(f"The source directory {source_dir} does not exist.")
        return

    # Create destination directory if it doesn't exist
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    # Open the log file for writing
    with open(log_file, 'w') as log:
        # Iterate over all files in the source directory
        for filename in os.listdir(source_dir):
            file_path = os.path.join(source_dir, filename)

            try:
                # Check if it's a file and its size is 0
                if os.path.isfile(file_path) and os.path.getsize(file_path) == 0:
                    # Move the file to the destination directory
                    shutil.move(file_path, os.path.join(dest_dir, filename))

                    # Write the file name to the log
                    log.write(filename + '\n')

                    # Increment the zero-byte files count
                    zero_byte_files_count += 1
            except Exception as e:
                print(f"Error processing file {filename}: {e}")

    # Print the counts
    print(f"Number of zero-byte files moved: {zero_byte_files_count}")
    print(f"Number of files in source directory after moving: {len(os.listdir(source_dir))}")
    print(f"Number of files in destination directory: {len(os.listdir(dest_dir))}")



In [24]:
source_directory = 'Pre_Processing/label_img_training_data/images'  #  source directory path
destination_directory = 'Pre_Processing/zero_byte_images/'  #  destination directory path
log_file_path = 'Pre_Processing/zero_byte_images_logs.txt'  # log file path

move_zero_byte_files(source_directory, destination_directory, log_file_path)

Number of zero-byte files moved: 0
Number of files in source directory after moving: 33455
Number of files in destination directory: 0


In [18]:
# Move invalid image files

def move_invalid_images(source_dir, dest_dir):
    # Create destination directory if it doesn't exist
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    # Iterate over all files in the source directory
    for filename in os.listdir(source_dir):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
            file_path = os.path.join(source_dir, filename)

            # Check if the image is valid
            try:
                with Image.open(file_path) as img:
                    img.verify()  # Verify if it's a valid image
            except (IOError, SyntaxError):
                print(f"Invalid image found: {filename}")
                # Move invalid image to the destination directory
                shutil.move(file_path, os.path.join(dest_dir, filename))

    print("Invalid images have been moved.")

In [25]:
source_directory = 'Pre_Processing/label_img_training_data/images'  #  source directory path
destination_directory = 'Pre_Processing/unrecongnized_images'  #  destination directory path


move_invalid_images(source_directory, destination_directory)
# file_count = count_files('Audio_Work/wav_data')
print(f'There are {count_files(source_directory)} files in the directory.')
print(f'There are {count_files(destination_directory)} files in the directory.')



Invalid images have been moved.
There are 33455 files in the directory.
There are 0 files in the directory.


In [26]:
def checkpoint_create_move_images_with_annotations(source_dir, dest_dir):
    # Create destination directory if it doesn't exist
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)

    # Collect all XML files and sort them
    xml_files = sorted([f for f in os.listdir(source_dir) if f.endswith('.xml')])

    if not xml_files:
        print("No XML annotations found.")
        return

    # Print the number of annotations found in the source
    print(f"Number of annotations in source: {len(xml_files)}")

    # Identify the last XML file (hence the last annotation)
    last_xml = xml_files[-1]
    last_annotation_base = os.path.splitext(last_xml)[0]

    # Move images and their annotations, if available
    for filename in os.listdir(source_dir):
        file_base, extension = os.path.splitext(filename)
        if file_base <= last_annotation_base:
            if extension in ['.jpg', '.xml']:  # Add other image formats if needed
                shutil.move(os.path.join(source_dir, filename), os.path.join(dest_dir, filename))

    # Count images and annotations in the destination
    images_count = len([f for f in os.listdir(dest_dir) if f.endswith('.jpg')])
    annotations_count = len([f for f in os.listdir(dest_dir) if f.endswith('.xml')])

    print(f"Images and annotations up to {last_xml} have been moved.")
    print(f"Number of images in destination: {images_count}")
    print(f"Number of annotations in destination: {annotations_count}")

# Example usage
# move_images_with_annotations('path/to/source_dir', 'path/to/dest_dir')


In [27]:
source_directory = 'Pre_Processing/label_img_training_data/images' 
destination_directory = 'Pre_Processing/annotated_label_img_training_data'  

checkpoint_create_move_images_with_annotations(source_directory, destination_directory)

No XML annotations found.


In [29]:
import os
import shutil

def move_img_xml_pairs(input_dir, output_dir):
    """
    Move image and XML file pairs from input_dir to output_dir.

    Parameters:
    input_dir (str): Directory containing the image (.jpg) and XML files.
    output_dir (str): Directory to move the file pairs to.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Counters for source directory
    source_images_count = 0
    source_xml_count = 0

    # Counters for destination directory
    dest_images_count = 0
    dest_xml_count = 0

    for file in os.listdir(input_dir):
        # Check if the file is a JPEG image
        if file.endswith('.jpg'):
            source_images_count += 1
            file_base = os.path.splitext(file)[0]
            img_path = os.path.join(input_dir, file)
            xml_path = os.path.join(input_dir, file_base + '.xml')

            # Move both files if XML file exists
            if os.path.exists(xml_path):
                source_xml_count += 1
                shutil.move(img_path, os.path.join(output_dir, file))
                shutil.move(xml_path, os.path.join(output_dir, file_base + '.xml'))
            else:
                print(f"XML file for {file} not found.")

    # Count files in destination directory after moving
    for file in os.listdir(output_dir):
        if file.endswith('.jpg'):
            dest_images_count += 1
        elif file.endswith('.xml'):
            dest_xml_count += 1

    # Printing the counts
    print(f"Number of images in source: {source_images_count}")
    print(f"Number of XML files in source: {source_xml_count}")
    print(f"Number of images in destination: {dest_images_count}")
    print(f"Number of XML files in destination: {dest_xml_count}")

# Example usage
# move_img_xml_pairs('path/to/input_dir', 'path/to/output_dir')


In [30]:
source_directory = 'Pre_Processing/annotated_label_img_training_data'  #  source directory path
destination_directory = 'Pre_Processing/pairs_annotated_label_img_training_data'  #  destination directory path
move_img_xml_pairs(source_directory,destination_directory)

XML file for 2023-10-08_16-24-01.jpg not found.
XML file for 2023-10-08_16-25-01.jpg not found.
XML file for 2023-10-08_16-26-01.jpg not found.
XML file for 2023-10-08_16-27-01.jpg not found.
XML file for 2023-10-08_16-28-01.jpg not found.
XML file for 2023-10-08_16-29-01.jpg not found.
XML file for 2023-10-08_16-30-01.jpg not found.
XML file for 2023-10-08_16-31-01.jpg not found.
XML file for 2023-10-08_16-32-01.jpg not found.
XML file for 2023-10-08_16-33-01.jpg not found.
XML file for 2023-10-08_16-34-01.jpg not found.
XML file for 2023-10-08_16-35-01.jpg not found.
XML file for 2023-10-08_16-36-01.jpg not found.
XML file for 2023-10-08_16-37-01.jpg not found.
XML file for 2023-10-08_16-38-01.jpg not found.
XML file for 2023-10-08_16-39-01.jpg not found.
XML file for 2023-10-08_16-41-01.jpg not found.
XML file for 2023-10-08_16-42-01.jpg not found.
XML file for 2023-10-08_16-43-01.jpg not found.
XML file for 2023-10-08_16-44-01.jpg not found.
XML file for 2023-10-08_16-45-01.jpg not

In [38]:
import os
import shutil
from datetime import datetime

def organize_files_by_date(source_directory, destination_directory):
    # Ensure source_directory is a valid directory
    if not os.path.isdir(source_directory):
        print(f"The source directory {source_directory} does not exist.")
        return

    # Create the destination directory if it doesn't exist
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)

    # Step 1: Create a dictionary to hold date and corresponding files
    date_files_map = {}

    # Scan files in the source directory and organize them by date
    for f in os.listdir(source_directory):
        if os.path.isfile(os.path.join(source_directory, f)):
            try:
                # Extract the date from the filename
                date_str = f.split('_')[0]
                datetime.strptime(date_str, "%Y-%m-%d")  # Validate date format

                # Add to the map
                if date_str not in date_files_map:
                    date_files_map[date_str] = []
                date_files_map[date_str].append(f)
            except (IndexError, ValueError):
                continue  # Skip files that do not match the format

    # Step 2: Create sub-directories in the destination directory and move files
    for date, files in date_files_map.items():
        sub_directory = os.path.join(destination_directory, date)
        if not os.path.exists(sub_directory):
            os.makedirs(sub_directory)

        for f in files:
            shutil.move(os.path.join(source_directory, f), sub_directory)

        print(f"Moved files with date {date} to sub-directory: {sub_directory}.")

# Example usage
# organize_files_by_date('/path/to/source_directory', '/path/to/destination_directory')


In [42]:
source_directory = 'Pre_Processing/annotated_label_img_training_data'  #  source directory path
destination_directory = 'Pre_Processing/organized_label_img_training_data'  #  destination directory path
organize_files_by_date(source_directory,destination_directory)

Moved files with date 2023-10-08 to sub-directory: Pre_Processing/organized_label_img_training_data/2023-10-08.
Moved files with date 2023-10-09 to sub-directory: Pre_Processing/organized_label_img_training_data/2023-10-09.
Moved files with date 2023-10-10 to sub-directory: Pre_Processing/organized_label_img_training_data/2023-10-10.
Moved files with date 2023-10-11 to sub-directory: Pre_Processing/organized_label_img_training_data/2023-10-11.
Moved files with date 2023-10-12 to sub-directory: Pre_Processing/organized_label_img_training_data/2023-10-12.
