In [2]:
import os
import pandas as pd
import random
from shutil import copy2

# Paths and configurations
csv_file = "./Data/fits_ext/dictionary_0.csv"
output_folder = "./Data/fits_ext"
input_folder = "./Data/merged_fits"
x_files_to_copy = 2  # Number of files to randomly copy

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Load the existing CSV or create a new DataFrame
if os.path.exists(csv_file):
    dictionary_df = pd.read_csv(csv_file)
else:
    dictionary_df = pd.DataFrame(columns=["input", "output", "label"])

# Get the list of existing files and determine the next ID for naming
existing_files = set(dictionary_df["input"].tolist())
next_id = dictionary_df.shape[0] + 1  # Start numbering from the next available ID

# Step 2: Find all `.fit` files in the input folder
input_files = [
    f for f in os.listdir(input_folder) if f.endswith(".fit") and f not in existing_files
]

# Step 3: Randomly select X files from the available files
files_to_copy = random.sample(input_files, min(len(input_files), x_files_to_copy))

# Step 4: Copy selected files, update the dictionary, and print details
new_entries = []
for file in files_to_copy:
    # Define input and output paths
    input_path = os.path.join(input_folder, file)
    output_file_name = f"tic{next_id}.fit"  # Generate the new output file name
    output_path = os.path.join(output_folder, output_file_name)
    
    # Copy the file
    copy2(input_path, output_path)
    
    # Append the new entry
    new_entries.append({"input": file, "output": output_file_name, "label": 0})
    
    # Print details
    print(f"Copied: {file} -> {output_file_name}")
    
    # Increment the next ID
    next_id += 1

# Append new entries to the DataFrame and save it back to the CSV
if new_entries:
    dictionary_df = pd.concat([dictionary_df, pd.DataFrame(new_entries)], ignore_index=True)
    dictionary_df.to_csv(csv_file, index=False)

print(f"\nCopied {len(new_entries)} files to {output_folder} and updated {csv_file}.")


Copied: MPC_12PslashPons-Brooks_B_180s_ZWO ASI1600MM-Cool_2024-02-22 19-12-26.fit -> tic48.fit
Copied: CD-31_3271_ingress_1_R_20s_ZWO ASI1600MM-Cool_2024-02-13 22-52-26.fit -> tic49.fit

Copied 2 files to ./Data/fits_ext and updated ./Data/fits_ext/dictionary_0.csv.
