In [16]:
from astropy.io import fits 
from astropy import constants
from astropy.timeseries import BoxLeastSquares 
import glob
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
import scipy.signal
from scipy.signal import savgol_filter
from scipy.signal import lombscargle
from scipy.optimize import minimize
from lightkurve import search_lightcurve

In [17]:
def fetch_and_normalize_kepler_data(target):
    # Search for light curves for the target across all quarters
    search_result = search_lightcurve(target, mission="Kepler")
    lc_collection = search_result.download_all()
    
    # Initialize empty arrays for time, flux, and error
    time, flux, error = np.array([]), np.array([]), np.array([])
    
    # Iterate through the downloaded light curves
    for lc in lc_collection:
        lc_data = lc.normalize().remove_nans()  # Normalize and remove NaNs
        
        # Extract time, flux, and flux error
        tmptime = lc_data.time.value
        tmpflux = lc_data.flux.value
        tmperror = lc_data.flux_err.value
        
        # Dynamically adjust window_length for Savitzky-Golay filter
        array_size = len(tmpflux)
        window_length = min(701, array_size - (array_size % 2 == 0))  # Ensure odd window_length
        
        if window_length > 2:  # Apply filter only if sufficient data points exist
            interp_savgol = savgol_filter(tmpflux, window_length=window_length, polyorder=3)
        else:
            interp_savgol = np.ones_like(tmpflux)  # Default to ones if filtering isn't possible
        
        # Normalize the flux
        time = np.append(time, tmptime)
        flux = np.append(flux, tmpflux / interp_savgol)
        error = np.append(error, tmperror / interp_savgol)
    
    # Create a DataFrame
    df = pd.DataFrame(
        {
            "time": time,
            "flux": flux,
            "error": error,
        }
    )
    
    # Data filtering
    mean_flux = np.mean(flux)
    std_flux = np.std(flux)
    df = df[(df["flux"] <= mean_flux + 2 * std_flux) & (df["flux"] >= mean_flux - 8 * std_flux)]
    
    return df


In [18]:
target = "Kepler-10" 
df = fetch_and_normalize_kepler_data(target)

  lc_data = lc.PDCSAP_FLUX.remove_nans()  # Remove NaNs


ValueError: If mode is 'interp', window_length must be less than or equal to the size of x.

In [None]:
def normalise_kelpler_data():
    time, flux, error = np.array([]), np.array([]), np.array([])
    for lcfile in glob.glob("Data/Objectlc/kplr*.fits"):
        with fits.open(lcfile) as tmp:
            tmptime = (tmp[1].data['TIME'])
            tmpflux = (tmp[1].data['PDCSAP_FLUX'])
            tmperror = (tmp[1].data['PDCSAP_FLUX_ERR'])

            # Remove nan from flux, and shortening time based on nan values in flux
            nan_flux = np.isnan(tmpflux)

            tmptime = tmptime[~nan_flux]
            tmpflux = tmpflux[~nan_flux]
            tmperror = tmperror[~nan_flux]

            # Normalise data
            interp_savgol = savgol_filter(tmpflux, window_length=701, polyorder=3) 

            time = np.append(time, tmptime)
            flux = np.append(flux, tmpflux/interp_savgol)
            error = np.append(error, tmperror/interp_savgol)

    df = pd.DataFrame(
        {
            "time": time,
            "flux": flux,
            "error": error
        }
    )

    # Data filter
    df = df[df["flux"] <= np.mean(flux) + 2*np.std(flux)] 
    df = df[df["flux"] >= np.mean(flux) - 8*np.std(flux)]
    
    return df