In [1]:
import os
import pandas as pd
import random
from shutil import copy2

# Paths and configurations
csv_file = "./Data/fits2_ext/dictionary_0.csv"
output_folder = "./Data/fits2_ext"
input_folder = "./Data/merged_fits"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Load the existing CSV or create a new DataFrame
if os.path.exists(csv_file):
    dictionary_df = pd.read_csv(csv_file)
else:
    dictionary_df = pd.DataFrame(columns=["input", "output", "label"])

# Calculate the current number of no-streak images
current_no_streak_count = dictionary_df[dictionary_df["label"] == 0].shape[0]

# Prompt user for the desired number of no-streak images
desired_no_streak_count = 120

# Determine how many more no-streak images are needed
additional_no_streak_needed = max(0, desired_no_streak_count - current_no_streak_count)

if additional_no_streak_needed == 0:
    print("The output folder already contains the requested number of no-streak images.")
else:
    print(f"Additional no-streak images needed: {additional_no_streak_needed}")

    # Get the list of existing files and determine the next ID for naming
    existing_files = set(dictionary_df["input"].tolist())
    next_id = dictionary_df.shape[0] + 1  # Start numbering from the next available ID

    # Step 2: Find all `.fit` files in the input folder
    input_files = [
        f for f in os.listdir(input_folder) if f.endswith(".fit") and f not in existing_files
    ]

    # Step 3: Randomly select the needed number of files from the available files
    files_to_copy = random.sample(input_files, min(len(input_files), additional_no_streak_needed))

    # Step 4: Copy selected files, update the dictionary, and print details
    new_entries = []
    for file in files_to_copy:
        # Define input and output paths
        input_path = os.path.join(input_folder, file)
        output_file_name = f"tic{next_id}.fit"  # Generate the new output file name
        output_path = os.path.join(output_folder, output_file_name)
        
        # Copy the file
        copy2(input_path, output_path)
        
        # Append the new entry
        new_entries.append({"input": file, "output": output_file_name, "label": 0})
        
        print(f"Copied: {file} -> {output_file_name}")
        
        next_id += 1

    # Append new entries to the DataFrame and save it back to the CSV
    if new_entries:
        dictionary_df = pd.concat([dictionary_df, pd.DataFrame(new_entries)], ignore_index=True)
        dictionary_df.to_csv(csv_file, index=False)

    print(f"\nCopied {len(new_entries)} files to {output_folder} and updated {csv_file}.")


Additional no-streak images needed: 96
Copied: A012_ PSN_J12355230plus2755559_R_600s_ZWO ASI1600MM-Cool_2024-02-27 21-06-37.fit -> tic48.fit
Copied: ExoClk_KPS-1b_low_R_165s_ZWO ASI1600MM-Cool_2024-02-02 02-59-25.fit -> tic49.fit
Copied: A012_ PSN_J12355230plus2755559_R_600s_ZWO ASI1600MM-Cool_2024-02-28 00-45-47.fit -> tic50.fit
Copied: ExoClk_Qatar-10b_low_R_127s_ZWO ASI1600MM-Cool_2024-02-01 23-21-31.fit -> tic51.fit
Copied: A012_ PSN_J12355230plus2755559_R_600s_ZWO ASI1600MM-Cool_2024-02-28 02-56-10.fit -> tic52.fit
Copied: A012_ PSN_J12355230plus2755559_R_600s_ZWO ASI1600MM-Cool_2024-02-28 04-09-09.fit -> tic53.fit
Copied: CD-31_3271_ingress_1_R_20s_ZWO ASI1600MM-Cool_2024-02-13 23-07-33.fit -> tic54.fit
Copied: A012_ PSN_J12355230plus2755559_R_600s_ZWO ASI1600MM-Cool_2024-02-21 02-40-11.fit -> tic55.fit
Copied: CD-31_3271_ingress_1_R_20s_ZWO ASI1600MM-Cool_2024-02-13 22-05-38.fit -> tic56.fit
Copied: CD-31_3271_ingress_1_R_20s_ZWO ASI1600MM-Cool_2024-02-13 23-06-07.fit -> tic57.f