In [1]:
import os
import shutil
from datetime import datetime, timedelta

In [2]:
def count_files(directory):
    """Counts the number of files in the given directory."""
    if not os.path.exists(directory):
        print("Directory does not exist.")
        return 0

    file_count = 0
    for item in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, item)):
            file_count += 1

    return file_count

In [3]:
file_count = count_files('Audio_Work/wav_data')
print(f'There are {file_count} wav files in the directory.')
file_count = count_files('new_training_data/audio')
print(f'There are {file_count} m4a files in the directory.')

There are 26651 wav files in the directory.
There are 26651 m4a files in the directory.


In [6]:
def copy_files_within_hours(file_name, file_directory, new_directory):
    # Ensure file_directory is a valid directory
    if not os.path.isdir(file_directory):
        print(f"The directory {file_directory} does not exist.")
        return

    # Step 1: Extract the timestamp from the filename
    try:
        timestamp_str = file_name.split('_')[1] + '_' + file_name.split('_')[2].split('.')[0]
        file_timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d_%H-%M-%S")
    except (IndexError, ValueError):
        print(f"Timestamp format is incorrect in the file name: {file_name}")
        return

    # Define the time range: previous and next hour
    prev_hour = file_timestamp - timedelta(hours=1)
    next_hour = file_timestamp + timedelta(hours=1)

    # Step 2: Filter files based on timestamp criteria
    selected_files = []
    for f in os.listdir(file_directory):
        if os.path.isfile(os.path.join(file_directory, f)):
            try:
                timestamp_str = f.split('_')[1] + '_' + f.split('_')[2].split('.')[0]
                timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d_%H-%M-%S")
                if prev_hour <= timestamp <= next_hour:
                    selected_files.append(f)
            except (IndexError, ValueError):
                continue  # Skip files that do not match the format

    # Step 3: Create the new directory and sub-directory, and copy files
    if not os.path.exists(new_directory):
        os.makedirs(new_directory)

    sub_directory_name = os.path.splitext(file_name)[0]  # Remove file extension
    sub_directory = os.path.join(new_directory, sub_directory_name)
    if not os.path.exists(sub_directory):
        os.makedirs(sub_directory)

    num_copied_files = 0
    for f in selected_files:
        shutil.copy2(os.path.join(file_directory, f), sub_directory)
        num_copied_files += 1

    print(f"Copied {num_copied_files} files from {prev_hour.strftime('%Y-%m-%d_%H')} to {next_hour.strftime('%Y-%m-%d_%H')} to sub-directory: {sub_directory}.")

# copy_files_within_hours('output_2023-10-09_06-33-01.wav', 'Audio_Work/wav_data', 'Audio_Work/birth_inference_wav')


In [9]:
file_name_1="output_2023-10-09_06-33-01.wav" # birth call instance 1
file_name_2="output_2023-10-08_16-23-35.wav" # no birth call
file_name_3="output_2023-10-17_16-01-01.wav" # birth call instance 2 and doesn't exist
file_name_4="output_2023-11-04_06-31-01.wav" # birth call instance 3

copy_files_within_hours(file_name_3, 'Audio_Work/wav_data', 'Audio_Work/birth_inference_wav')



Copied 0 files from 2023-10-17_15 to 2023-10-17_17 to sub-directory: Audio_Work/birth_inference_wav/output_2023-10-17_16-01-01.


In [13]:
import os
import shutil
from datetime import datetime, timedelta

def copy_files_within_specified_range(file_name, file_directory, new_directory):
    # Ensure file_directory is a valid directory
    if not os.path.isdir(file_directory):
        print(f"The directory {file_directory} does not exist.")
        return

    # Step 1: Extract the timestamp from the filename
    try:
        timestamp_str = file_name.split('_')[1] + '_' + file_name.split('_')[2].split('.')[0]
        file_timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d_%H-%M-%S")
    except (IndexError, ValueError):
        print(f"Timestamp format is incorrect in the file name: {file_name}")
        return

    # Define the time range
    prev_time = file_timestamp - timedelta(minutes=7)
    next_time = file_timestamp + timedelta(minutes=7)

    # Check if the specific file exists
    specific_file_exists = os.path.isfile(os.path.join(file_directory, file_name))

    # Adjust time range if specific file does not exist
    if not specific_file_exists:
        print(f"The specific file {file_name} does not exist.")
        prev_time -= timedelta(minutes=1)

    # Step 2: Filter files based on timestamp criteria
    selected_files = []
    for f in os.listdir(file_directory):
        if os.path.isfile(os.path.join(file_directory, f)):
            try:
                timestamp_str = f.split('_')[1] + '_' + f.split('_')[2].split('.')[0]
                timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d_%H-%M-%S")
                if prev_time <= timestamp <= next_time:
                    selected_files.append(f)
            except (IndexError, ValueError):
                continue  # Skip files that do not match the format

    # Step 3: Create the new directory and sub-directory, and copy files
    if not os.path.exists(new_directory):
        os.makedirs(new_directory)

    sub_directory_name = os.path.splitext(file_name)[0]  # Remove file extension
    sub_directory = os.path.join(new_directory, sub_directory_name)
    if not os.path.exists(sub_directory):
        os.makedirs(sub_directory)

    num_copied_files = 0
    for f in selected_files:
        shutil.copy2(os.path.join(file_directory, f), sub_directory)
        num_copied_files += 1

    print(f"Copied {num_copied_files} files from {prev_time.strftime('%Y-%m-%d_%H-%M-%S')} to {next_time.strftime('%Y-%m-%d_%H-%M-%S')} to sub-directory: {sub_directory}.")



In [14]:
file_name_1="output_2023-10-09_06-33-01.wav" # birth call instance 1
file_name_2="output_2023-10-08_16-23-35.wav" # no birth call
file_name_3="output_2023-10-17_16-01-01.wav" # birth call instance 2 and doesn't exist
file_name_4="output_2023-11-04_06-31-01.wav" # birth call instance 3

copy_files_within_specified_range(file_name_1, 'Audio_Work/wav_data', 'Audio_Work/birth_inference_minutes_wav')
copy_files_within_specified_range(file_name_4, 'Audio_Work/wav_data', 'Audio_Work/birth_inference_minutes_wav')



The specific file output_2023-10-09_06-33-01.wav does not exist.
Copied 15 files from 2023-10-09_06-25-01 to 2023-10-09_06-40-01 to sub-directory: Audio_Work/birth_inference_minutes_wav/output_2023-10-09_06-33-01.
The specific file output_2023-11-04_06-31-01.wav does not exist.
Copied 15 files from 2023-11-04_06-23-01 to 2023-11-04_06-38-01 to sub-directory: Audio_Work/birth_inference_minutes_wav/output_2023-11-04_06-31-01.
