# Data Prepper
### Casper Hindriks | S4553128


### Tasks

* Read multiple types of data structures.
* Correctly detect and use the column names in each dataset.
* Produce **two output files** for each star:

  * **`{star_name}_flares.csv`**

    * A CSV file containing all extracted flare information (include the specific parameters you want to store).
  * **`{star_name}_times.txt`**

    * A text file listing all observation timestamps.
    * Used to calculate the **total observing time** and to **identify gaps** in the observations (i.e., a complete chronological list of all data points).


In [2]:
# --- Standard Library ---

import math

# --- Third-Party Libraries ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams, font_manager as fm
from astropy.io import fits
from scipy.stats import stats, expon, erlang
from scipy.optimize import curve_fit
from tqdm import tqdm  # progress bar
from sklearn.cluster import DBSCAN
from scipy.stats import kstest
import matplotlib.patches as mpatches
import os


# --- Global Settings ---
np.set_printoptions(suppress=True, precision=6)

# --- Load custom font --- 
custom_font = fm.FontProperties(
    fname="/usr/share/fonts/dejavu-serif-fonts/DejaVuSerifCondensed.ttf"
)

# --- Apply global Matplotlib settings --- 
plt.rcParams.update({
    "font.family": custom_font.get_name(),
    "mathtext.fontset": "stix",
    "font.size": 12,
    "figure.dpi": 150
})


In [19]:
def flare_file_detector(star_name):
    
    # Get all file names in the directory
    directory_path = f"../Data & Results/{star_name}/data"
    file_names = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
    
    # Create full paths
    file_paths = [os.path.join(directory_path, f) for f in file_names]
    
    number_of_files = len(file_names)
    
    # Print detections
    for i in range(number_of_files):
        print(f"Detected file named: {file_names[i]} in directory: {file_paths[i]}")
    
    return number_of_files, file_names, file_paths

In [21]:
number_of_files, file_names, file_paths = flare_file_detector('hip67522')

Detected file named: cheops_all_timeseries.csv in directory: ../Data & Results/hip67522/data/cheops_all_timeseries.csv
Detected file named: tess_hip67522_11.fits in directory: ../Data & Results/hip67522/data/tess_hip67522_11.fits
Detected file named: tess_hip67522_38.fits in directory: ../Data & Results/hip67522/data/tess_hip67522_38.fits
Detected file named: hip67522_flares.csv in directory: ../Data & Results/hip67522/data/hip67522_flares.csv
Detected file named: tess_hip67522_64.fits in directory: ../Data & Results/hip67522/data/tess_hip67522_64.fits


In [24]:
print(file_names)

['cheops_all_timeseries.csv', 'tess_hip67522_11.fits', 'tess_hip67522_38.fits', 'hip67522_flares.csv', 'tess_hip67522_64.fits']
