In [9]:
import os
import re
import shutil
from datetime import datetime, timedelta

In [5]:
# To get to know the unique dates in the directory

def extract_unique_dates(directory):
    # Regular expression to match the date format in file names
    date_pattern = re.compile(r"\d{4}-\d{2}-\d{2}")

    unique_dates = set()

    # Iterate over the files in the given directory
    for filename in os.listdir(directory):
        # Find the date in the filename using the regular expression
        match = date_pattern.search(filename)
        if match:
            date_str = match.group()
            # Convert the date string to a datetime object to ensure valid dates
            try:
                date = datetime.strptime(date_str, "%Y-%m-%d")
                unique_dates.add(date_str)
            except ValueError:
                # Skip invalid date formats
                continue

    return unique_dates




In [8]:
directory = "Pre_Processing/pairs_annotated_label_img_training_data"
print(extract_unique_dates(directory))

directory = "new_training_data/images"
print(extract_unique_dates(directory))


{'2023-10-09', '2023-10-11', '2023-10-12', '2023-10-10'}
{'2023-10-12', '2023-10-20', '2023-10-24', '2023-10-31', '2023-11-02', '2023-10-15', '2023-10-16', '2023-10-21', '2023-10-25', '2023-10-29', '2023-10-27', '2023-10-09', '2023-10-18', '2023-10-11', '2023-11-03', '2023-10-13', '2023-11-04', '2023-10-10', '2023-10-17', '2023-10-22', '2023-11-01', '2023-10-19', '2023-10-26', '2023-10-30', '2023-10-28', '2023-10-23', '2023-10-08', '2023-10-14'}


In [11]:
import os
import shutil
from datetime import datetime, timedelta

def create_weekly_directories_and_copy_files(src_directory, output_directory):
    # Regular expression to match the date format in file names
    date_pattern = re.compile(r"\d{4}-\d{2}-\d{2}")

    # Dictionary to hold the start date of each week and its associated files
    weekly_files = {}

    # Iterate over the files in the source directory
    for filename in os.listdir(src_directory):
        # Find the date in the filename using the regular expression
        match = date_pattern.search(filename)
        if match:
            date_str = match.group()
            # Convert the date string to a datetime object
            try:
                date = datetime.strptime(date_str, "%Y-%m-%d")
            except ValueError:
                # Skip invalid date formats
                continue

            # Determine the start of the week for this date
            start_of_week = date - timedelta(days=date.weekday())
            start_of_week_str = start_of_week.strftime("%Y-%m-%d")

            # Add the file to the correct week
            if start_of_week_str not in weekly_files:
                weekly_files[start_of_week_str] = []
            weekly_files[start_of_week_str].append(filename)

    # Create directories for each week in the output directory and copy files
    for week_start, files in weekly_files.items():
        week_dir = os.path.join(output_directory, week_start)
        os.makedirs(week_dir, exist_ok=True)

        for file in files:
            src_file_path = os.path.join(src_directory, file)
            dest_file_path = os.path.join(week_dir, file)
            shutil.copy2(src_file_path, dest_file_path)

    return weekly_files


In [12]:
src_directory = "new_training_data/images"
output_directory = "Pre_Processing/weekly_data_spllit"
create_weekly_directories_and_copy_files(src_directory, output_directory)

In [1]:
import os
import shutil

# Move images without corresponding XML files to a new directory

def move_jpegs_without_xml(source_directory, destination_directory):
    # Ensure the source directory exists
    if not os.path.isdir(source_directory):
        print(f"The source directory {source_directory} does not exist.")
        return

    # Ensure the destination directory exists, create if not
    if not os.path.isdir(destination_directory):
        os.makedirs(destination_directory)
        print(f"Created destination directory: {destination_directory}")

    # Iterate over files in the source directory
    for filename in os.listdir(source_directory):
        if filename.lower().endswith((".jpeg", ".jpg")):
            # Check if the corresponding XML file exists
            xml_filename = os.path.splitext(filename)[0] + ".xml"
            if not os.path.isfile(os.path.join(source_directory, xml_filename)):
                # Move the JPEG file to the destination directory
                shutil.move(os.path.join(source_directory, filename), destination_directory)
                print(f"Moved: {filename}")

    print("Operation completed.")


In [None]:
move_jpegs_without_xml('Pre_Processing/weekly_data_spllit/2023-10-08', 'Pre_Processing/weekly_data_spllit_not_annotated')
