# Visualizing Light Curves #

## Import Statements

In [2]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
from astropy.io import fits
import random


## Functions to load light curves ##

In [3]:
# Path to the directory containing the FITS files
data_dir = '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned'

# Function to load a single FITS file and return as a Pandas DataFrame
def load_light_curve(file_path, band = 1):
    with fits.open(file_path) as hdul:
        data = hdul[1].data  # Assuming light curve data is in the second HDU
        try:
            return pd.DataFrame({
                'TIME': data['TIME'],
                'TIMEDEL': data['TIMEDEL'],
                'RATE': data['RATE'][:, band],  # Use the second energy range
            })
        except KeyError:
            print(f"Skipping file {file_path}: some key not found")
            return None

In [8]:
def check_lightcurve_permissions(data_dir = '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned'):
    """
    Check permissions for all light curve files in the given directory and save inaccessible ones to a file
    in the same directory as this script.
    
    Args:
        data_dir (str): Path to the directory containing light curve files
        
    Returns:
        List[str]: List of files that could not be accessed
    """
    inaccessible_files = []
    
    # Check if the data directory exists and is accessible
    if not os.path.exists(data_dir):
        raise FileNotFoundError(f"Directory {data_dir} does not exist")
    
    if not os.access(data_dir, os.R_OK):
        raise PermissionError(f"No read permission for directory {data_dir}")
    
    # Get all FITS files in the directory
    fits_files = glob.glob(os.path.join(data_dir, "*.fits"))
    
    # Check each file for read permission
    for file_path in fits_files:
        if not os.access(file_path, os.R_OK):
            inaccessible_files.append(file_path)
    
    # Save inaccessible files to a text file in script directory
    output_file = os.path.join(os.getcwd(), "inaccessible_lightcurves.txt")
    with open(output_file, "w") as f:
        for file_path in inaccessible_files:
            f.write(f"{file_path}\n")
    
    print(f"Saved inaccessible files list to: {output_file}")
    return inaccessible_files

def read_inaccessible_lightcurves():
    """
    Read the list of inaccessible light curves from the text file in the notebook directory.
    
    Returns:
        list: List of file paths that were inaccessible
    """
    file_path = os.path.join(os.getcwd(), "inaccessible_lightcurves.txt")
    
    try:
        with open(file_path, 'r') as f:
            # Read all lines and remove any trailing whitespace
            inaccessible_files = [line.strip() for line in f.readlines()]
        return inaccessible_files
    except FileNotFoundError:
        print(f"No inaccessible light curves file found at {file_path}")
        return []

a = check_lightcurve_permissions()
for i in a:
    print(i)

Saved inaccessible files list to: /orcd/home/002/pdong/Astro UROP/inaccessible_lightcurves.txt
/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_039108_020_LightCurve_00089_c010_rebinned.fits
/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_041171_020_LightCurve_00067_c010_rebinned.fits
/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_045132_020_LightCurve_00051_c010_rebinned.fits
/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_072105_020_LightCurve_00016_c010_rebinned.fits
/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_062144_020_LightCurve_00087_c010_rebinned.fits
/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_039108_020_LightCurve_00177_c010_rebinned.fits
/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_056174_020_LightCurve_00052_c010_rebinned.fits
/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_049108_020_LightCurve_00103_c010_rebinned.fits
/pool001/rarcodia/eROSITA_public/

In [9]:
# print(read_inaccessible_lightcurves())

['/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_039108_020_LightCurve_00089_c010_rebinned.fits', '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_041171_020_LightCurve_00067_c010_rebinned.fits', '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_045132_020_LightCurve_00051_c010_rebinned.fits', '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_072105_020_LightCurve_00016_c010_rebinned.fits', '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_062144_020_LightCurve_00087_c010_rebinned.fits', '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_039108_020_LightCurve_00177_c010_rebinned.fits', '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_056174_020_LightCurve_00052_c010_rebinned.fits', '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_049108_020_LightCurve_00103_c010_rebinned.fits', '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_053132_020_LightCurve_00119_c010_rebinne

In [None]:
def load_n_light_curves(n, data_dir = '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned', all = False):
    """
    Loads a specified amount of light curves to analyze.
    
    Parameters:
        n (int): Number of light curves to load.
        data_dir (str): The filepath where the data is located
        all (bool): True if we want to load all data
        band (int): 0 or 1 depending on which band I want to load
        
    Returns:
        light_curves_1 (list): A list of n light curves in 0.2-0.6 keV,
        light_curves_2 (list): A list of n light curves in 0.6-2.3keV
        light_curves_3 (list): A list of n light curves in 2.3-5.0keV
    """
    
    
    if all:
       # Get the list of all FITS files
        fits_files = glob.glob(os.path.join(data_dir, "*.fits"))
    else:
        # Get the list of FITS files and randomly select n files
        fits_files = random.sample(glob.glob(os.path.join(data_dir, "*.fits")), n)
    # Load all light curves into a list of DataFrames
    light_curves_1 = [df for df in (load_light_curve(file, band = 0) for file in fits_files) if df is not None]
    light_curves_2 = [df for df in (load_light_curve(file, band = 1) for file in fits_files) if df is not None]
    light_curves_3 = [df for df in (load_light_curve(file, band = 2) for file in fits_files) if df is not None]
    return light_curves_1, light_curves_2, light_curves_3

# lightcurves_1, lightcurves_2 = load_n_lightcurves(400)
# print('light curves loaded')


## Plotting Functions ##

In [4]:
# Plot all light curves in a grid not adjusting whitspace
def plot_light_curves_noadj(light_curves, rows=10, cols=10):
    fig, axes = plt.subplots(rows, cols, figsize=(20, 20), sharex=True, sharey=True)
    axes = axes.flatten()  # Flatten the grid of axes for easy indexing

    for i, df in enumerate(light_curves):
        if i >= rows * cols:  # Stop if we exceed the grid size
            break
        ax = axes[i]
        ax.errorbar(
            df['TIME'],
            df['RATE'],
            fmt='o', markersize=2, label="Light Curve"
        )
        ax.set_title(f"LC {i+1}", fontsize=8)
        ax.tick_params(axis='both', which='major', labelsize=6)

    # Hide unused subplots
    for j in range(i+1, rows * cols):
        axes[j].axis('off')

    plt.tight_layout()
    plt.show()

In [5]:
def plot_light_curves(light_curves, rows=10, cols=10, margin=0.1):
    """
    Plots light curves in a grid, dynamically adjusting the axis limits to reduce whitespace.
    
    Parameters:
        light_curves (list): List of Pandas DataFrames containing light curve data.
        rows (int): Number of rows in the grid.
        cols (int): Number of columns in the grid.
        margin (float): Fractional margin added around data for better visualization.
    """
    fig, axes = plt.subplots(rows, cols, figsize=(20, 20), sharex=False, sharey=False)
    axes = axes.flatten()  # Flatten the grid of axes for easy indexing

    for i, df in enumerate(light_curves):
        if i >= rows * cols:  # Stop if we exceed the grid size
            break

        ax = axes[i]
        
        # Plot the light curve -> s is the size of marker
        ax.scatter(df['TIME'], df['RATE'], s=2) 

        # Dynamically adjust axis limits
        x_min, x_max = df['TIME'].min(), df['TIME'].max()
        y_min, y_max = df['RATE'].min(), df['RATE'].max()

        # Add a margin to the limits
        x_margin = (x_max - x_min) * margin
        y_margin = (y_max - y_min) * margin

        ax.set_xlim(x_min - x_margin, x_max + x_margin)
        ax.set_ylim(y_min - y_margin, y_max + y_margin)

        # Set title and tick parameters
        ax.set_title(f"LC {i+1}", fontsize=8)
        ax.tick_params(axis='both', which='major', labelsize=6)

    # Hide unused subplots
    for j in range(i + 1, rows * cols):
        axes[j].axis('off')

    plt.tight_layout()
    plt.show()
    
# Call the plotting function
# plot_light_curves(light_curves)

## Call the functions to generate plots of light curves ##

In [6]:
# Generate 400 light curves
light_curves_1, light_curves_2, light_curves_3 = load_n_light_curves(400, all = True)

print('light curves generated')


PermissionError: [Errno 13] Permission denied: '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned/em01_039108_020_LightCurve_00089_c010_rebinned.fits'

In [None]:
# Call the plotting function
plot_light_curves(light_curves_1[0:100], rows = 10, cols = 10)
plot_light_curves(light_curves_2[0:100], rows=10, cols = 10)
# plot_light_curves(light_curves[100:200], rows = 10, cols = 10)
# plot_light_curves(light_curves[200:300], rows = 10, cols = 10)
# plot_light_curves(light_curves[300:400], rows = 10, cols = 10)



## Look at the errors as well ##

In [None]:
# Function to load a single FITS file and return as a Pandas DataFrame
def load_light_curve_error(file_path):
    with fits.open(file_path) as hdul:
        data = hdul[1].data  # Assuming light curve data is in the second HDU
        try:
            # print(data["RATE_ERRM"])
            # print(data["RATE_ERRP"])
            return pd.DataFrame({
                'TIME': data['TIME'],
                'TIMEDEL': data['TIMEDEL'],
                'RATE': data['RATE'][:, 1],  # Use the second energy range
                "RATE_ERRM": data["RATE_ERRM"][:, 1],
                "RATE_ERRP": data["RATE_ERRP"][:, 1]
            })
        except KeyError:
            print(f"Skipping file {file_path}: some key not found")
            return None

def load_n_light_curves_error(n, data_dir = '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned', all = False):
    """
    Loads a specified amount of light curves to analyze.
    
    Parameters:
        n (int): Number of light curves to load.
        data_dir (str): The filepath where the data is located
        
    Returns:
        light_curves (list): 
    """
    
    
    if all:
       # Get the list of all FITS files
        fits_files = glob.glob(os.path.join(data_dir, "*.fits"))
    else:
        # Get the list of FITS files and randomly select n files
        fits_files = random.sample(glob.glob(os.path.join(data_dir, "*.fits")), n)
    
    # Load all light curves into a list of DataFrames
    light_curves = [df for df in (load_light_curve_error(file) for file in fits_files) if df is not None]
    
    return light_curves

def plot_light_curves_error(light_curves, rows=10, cols=10, margin=0.1):
    """
    Plots light curves in a grid, dynamically adjusting the axis limits to reduce whitespace.
    
    Parameters:
        light_curves (list): List of Pandas DataFrames containing light curve data.
        rows (int): Number of rows in the grid.
        cols (int): Number of columns in the grid.
        margin (float): Fractional margin added around data for better visualization.
    """
    fig, axes = plt.subplots(rows, cols, figsize=(20, 20), sharex=False, sharey=False)
    axes = axes.flatten()  # Flatten the grid of axes for easy indexing

    for i, df in enumerate(light_curves):
        if i >= rows * cols:  # Stop if we exceed the grid size
            break

        ax = axes[i]
        
        # Plot the light curve -> s is the size of marker
        ax.errorbar(
            df['TIME'], 
            df['RATE'], 
            yerr=[df['RATE_ERRM'], df['RATE_ERRP']], #check
            fmt='o', markersize=2, label="Light Curve"
        )

        # Dynamically adjust axis limits
        x_min, x_max = df['TIME'].min(), df['TIME'].max()
        y_min, y_max = df['RATE'].min(), df['RATE'].max()

        # Add a margin to the limits
        x_margin = (x_max - x_min) * margin
        y_margin = (y_max - y_min) * margin

        ax.set_xlim(x_min - x_margin, x_max + x_margin)
        ax.set_ylim(y_min - y_margin, y_max + y_margin)

        # Set title and tick parameters
        ax.set_title(f"LC {i+1}", fontsize=8)
        ax.tick_params(axis='both', which='major', labelsize=6)

    # Hide unused subplots
    for j in range(i + 1, rows * cols):
        axes[j].axis('off')

    plt.tight_layout()
    plt.show()
    
# Call the plotting function

light_curves_error = load_n_light_curves_error(100)
plot_light_curves_error(light_curves_error, rows = 10, cols = 10)


## Messing Around with the Data ##

In [None]:
from astropy.table import Table
from astropy.io import fits
import numpy as np

def read_fits_table(filename):
    table = Table.read(filename)
    # Convert to native byte order before DataFrame conversion
    for col in table.columns:
        if table[col].dtype.byteorder not in ('=', '|'):
            table[col] = table[col].byteswap().newbyteorder()
    return table

# Read FITS file into an Astropy Table
data_dir = '/pool001/rarcodia/eROSITA_public/data/eRASS1_lc_rebinned'
# print(glob.glob(os.path.join(data_dir, "*.fits")))
# print(glob.glob(os.path.join(data_dir, "*.fits")))


# Get a list of FITS files
fits_file1 = glob.glob(os.path.join(data_dir, "*.fits"))[0]



# print(glob.glob(os.path.join(data_dir, "*.fits"))[:10])
table = read_fits_table(fits_file1)
# print(table)

def flatten_fits_table(table):
    flattened_data = {}
    
    for col_name in table.colnames:
        col_data = table[col_name]
        if len(col_data.shape) > 1:
            # For each dimension in the column, create a new column
            for i in range(col_data.shape[1]):
                new_col_name = f"{col_name}_{i}"
                flattened_data[new_col_name] = col_data[:, i]
        else:
            flattened_data[col_name] = col_data
            
    return pd.DataFrame(flattened_data)



# Convert to pandas DataFrame
df = flatten_fits_table(table)

print(df)