In [1]:
import os
import shutil
import random
from concurrent.futures import ThreadPoolExecutor

def move_file(file, source_dir, dest_dir):
    shutil.move(os.path.join(source_dir, file), os.path.join(dest_dir, file))

def separate_fits_spectra(source_dir, validation_dir, training_dir, validation_count=400):
    """
    Separates FITS spectra into validation and training sets.

    Parameters:
    source_dir (str): Path to the source directory containing FITS spectra.
    validation_dir (str): Path to the directory where validation set will be stored.
    training_dir (str): Path to the directory where training set will be stored.
    validation_count (int): Number of files to be used for the validation set. Default is 400.
    """
    # Create the destination directories if they don't exist
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(training_dir, exist_ok=True)

    # Get a list of all fits files in the source directory
    fits_files = [f for f in os.listdir(source_dir) if f.endswith('.fits')]

    # Shuffle the list of fits files to ensure randomness
    random.shuffle(fits_files)

    # Separate files for the validation set
    validation_files = fits_files[:validation_count]

    # The remaining files will be used for the training set
    training_files = fits_files[validation_count:]

    # Use ThreadPoolExecutor to move files concurrently
    with ThreadPoolExecutor() as executor:
        # Move the validation files to the validation directory
        executor.map(move_file, validation_files, [source_dir]*len(validation_files), [validation_dir]*len(validation_files))
        
        # Move the training files to the training directory
        executor.map(move_file, training_files, [source_dir]*len(training_files), [training_dir]*len(training_files))

    print("Files have been successfully separated into validation and training sets.")



In [2]:
import os
import shutil
import random
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

def move_file(file, source_dir, dest_dir):
    shutil.move(os.path.join(source_dir, file), os.path.join(dest_dir, file))

def separate_fits_spectra(source_dir, validation_dir, training_dir, validation_count=400):
    """
    Separates FITS spectra into validation and training sets.

    Parameters:
    source_dir (str): Path to the source directory containing FITS spectra.
    validation_dir (str): Path to the directory where validation set will be stored.
    training_dir (str): Path to the directory where training set will be stored.
    validation_count (int): Number of files to be used for the validation set. Default is 400.
    """
    # Create the destination directories if they don't exist
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(training_dir, exist_ok=True)

    # Get a list of all fits files in the source directory
    fits_files = [f for f in os.listdir(source_dir) if f.endswith('.fits')]

    # Shuffle the list of fits files to ensure randomness
    random.shuffle(fits_files)

    # Separate files for the validation set
    validation_files = fits_files[:validation_count]

    # The remaining files will be used for the training set
    training_files = fits_files[validation_count:]

    # Use ThreadPoolExecutor to move files concurrently with tqdm progress bar
    with ThreadPoolExecutor() as executor:
        # Move the validation files to the validation directory
        list(tqdm(executor.map(move_file, validation_files, [source_dir]*len(validation_files), [validation_dir]*len(validation_files)), total=len(validation_files), desc="Moving validation files"))
        
        # Move the training files to the training directory
        list(tqdm(executor.map(move_file, training_files, [source_dir]*len(training_files), [training_dir]*len(training_files)), total=len(training_files), desc="Moving training files"))

    print("Files have been successfully separated into validation and training sets.")

# Example usage:
# separate_fits_spectra('path_to_your_fits_spectra_folder', 'path_to_validation_set_folder', 'path_to_training_set_folder')


In [11]:
import os
import shutil
import random
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

def move_file(file, source_dir, dest_dir):
    try:
        shutil.move(os.path.join(source_dir, file), os.path.join(dest_dir, file))
        print(f"Moved file: {file}")
    except Exception as e:
        print(f"Error moving file {file}: {e}")

def separate_fits_spectra(source_dir, validation_dir, training_dir, validation_count=400):
    """
    Separates FITS spectra into validation and training sets.

    Parameters:
    source_dir (str): Path to the source directory containing FITS spectra.
    validation_dir (str): Path to the directory where validation set will be stored.
    training_dir (str): Path to the directory where training set will be stored.
    validation_count (int): Number of files to be used for the validation set. Default is 400.
    """
    # Create the destination directories if they don't exist
    os.makedirs(validation_dir, exist_ok=True)
    os.makedirs(training_dir, exist_ok=True)

    # Get a list of all fits files in the source directory
    fits_files = [f for f in os.listdir(source_dir) ]#if f.endswith('.fits')]
    print(f"Found {len(fits_files)} FITS files in the source directory.")

    # Shuffle the list of fits files to ensure randomness
    random.shuffle(fits_files)

    # Separate files for the validation set
    validation_files = fits_files[:validation_count]
    print(f"Selected {len(validation_files)} files for the validation set.")

    # The remaining files will be used for the training set
    training_files = fits_files[validation_count:]
    print(f"Selected {len(training_files)} files for the training set.")

    # Use ThreadPoolExecutor to move files concurrently with tqdm progress bar
    with ThreadPoolExecutor() as executor:
        # Move the validation files to the validation directory
        list(tqdm(executor.map(move_file, validation_files, [source_dir]*len(validation_files), [validation_dir]*len(validation_files)), total=len(validation_files), desc="Moving validation files"))
        
        # Move the training files to the training directory
        list(tqdm(executor.map(move_file, training_files, [source_dir]*len(training_files), [training_dir]*len(training_files)), total=len(training_files), desc="Moving training files"))

    print("Files have been successfully separated into validation and training sets.")

# Example usage:
# separate_fits_spectra('path_to_your_fits_spectra_folder', 'path_to_validation_set_folder', 'path_to_training_set_folder')


In [4]:
# Example usage:
separate_fits_spectra('\\wsl.localhost\Ubuntu\home\jcwind\Star Classifier\Star-Classifier\gal_spectra', '\\wsl.localhost\Ubuntu\home\jcwind\Star Classifier\Star-Classifier\gal_spectravalidation_set', '\\wsl.localhost\Ubuntu\home\jcwind\Star Classifier\Star-Classifier\gal_spectra/training_set')

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 15-17: truncated \UXXXXXXXX escape (2470976859.py, line 2)

In [19]:
separate_fits_spectra('/home/jcwind/Star Classifier/Star-Classifier/gal_spectra', 
                      '/home/jcwind/Star Classifier/Star-Classifier/validation_set/gal_spectra', 
                      '/home/jcwind/Star Classifier/Star-Classifier/training_set/gal_spectra')
separate_fits_spectra('/home/jcwind/Star Classifier/Star-Classifier/star_spectra', 
                        '/home/jcwind/Star Classifier/Star-Classifier/validation_set/star_spectra', 
                        '/home/jcwind/Star Classifier/Star-Classifier/training_set/star_spectra')
separate_fits_spectra('/home/jcwind/Star Classifier/Star-Classifier/agn_spectra',
                        '/home/jcwind/Star Classifier/Star-Classifier/validation_set/agn_spectra',
                        '/home/jcwind/Star Classifier/Star-Classifier/training_set/agn_spectra')
separate_fits_spectra('/home/jcwind/Star Classifier/Star-Classifier/bin_spectra',
                        '/home/jcwind/Star Classifier/Star-Classifier/validation_set/bin_spectra',
                        '/home/jcwind/Star Classifier/Star-Classifier/training_set/bin_spectra')



Found 0 FITS files in the source directory.
Selected 0 files for the validation set.
Selected 0 files for the training set.


Moving validation files: 0it [00:00, ?it/s]
Moving training files: 0it [00:00, ?it/s]


Files have been successfully separated into validation and training sets.
Found 0 FITS files in the source directory.
Selected 0 files for the validation set.
Selected 0 files for the training set.


Moving validation files: 0it [00:00, ?it/s]
Moving training files: 0it [00:00, ?it/s]


Files have been successfully separated into validation and training sets.
Found 0 FITS files in the source directory.
Selected 0 files for the validation set.
Selected 0 files for the training set.


Moving validation files: 0it [00:00, ?it/s]
Moving training files: 0it [00:00, ?it/s]


Files have been successfully separated into validation and training sets.
Found 0 FITS files in the source directory.
Selected 0 files for the validation set.
Selected 0 files for the training set.


Moving validation files: 0it [00:00, ?it/s]
Moving training files: 0it [00:00, ?it/s]

Files have been successfully separated into validation and training sets.



