In [112]:
import astropy
from astropy.io import fits 
from astropy.table import Table
from astropy.time import Time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [113]:
#Goal: define 2 functions to apply to TESS data
#     1. dataClean
#      -remove nan values 
#      -remove flagged values 
#      -correct time values

#     2. flareFinder
#       -compare avg flux value of light curve to flux data
#       -if value_flux > avg flux by x(sigma), pull into new array (until equal to or less than x(sigma)) (new array for each flare?)
#       -then continue through the rest of the data
#       -find max flux in each array 
#       -pull associated timestamp from cleaned data 
#       -print timestamp, flux value for each flare discovered

In [114]:
def dataClean(filename): #Can I use the file path as an input?
    """ This function will remove nan values from TESS 
        20 second lightcurve data and correct time values"""
    
 #Create an array of time and flux data with nans removed    
    with fits.open(filename, mode="readonly") as hdulist:
        raw_time = hdulist[1].data['TIME']
        raw_flux = hdulist[1].data['PDCSAP_FLUX']
        raw_err = hdulist[1].data['PDCSAP_FLUX_ERR']
    
    data = np.vstack((raw_time, raw_flux, raw_err))
    nonan_data = data[:, ~np.isnan(data).any(axis=0)]
 #   error = np.where(np.isfinite(raw_err))
#Apply time correction and create array of corrected and cleaned data
    times = nonan_data[0]
    flux = nonan_data[1]
    error = nonan_data[2]
    t_corr = []
    for i in times:
        r = i + 2457000
        t_corr.append(r)
    time = Time(t_corr, format = 'jd', scale = 'utc')
    time.format = 'iso'
    time = np.array(time)
    flux = np.array(flux)
    err = np.array(error)
#Return cleaned data
    return[time,flux,err]

In [118]:
def flareFinder(cleaned_data, sigma_multiplier):
    """ This function will take cleaned data and a multiple to be applied to sigma. 
        The function will run through the data, creating arrays of flux values 
        when the flux is > avg flux by the specified multiple of sigma. Using the max
        flux value from each array, the corresponding time value will be pulled from
        the cleaned data set. Timestamp and max flux will be printed """
        
#find avg flux and flux error from tess data, set as variables, using random number for avg flux
    fluxes = cleaned_data[1]
    times = cleaned_data[0]
    F_err = cleaned_data[2] 
    ix = np.where(fluxes > (4460 + (sigma_multiplier*F_err)))
    flareFlux = fluxes[ix]
    flareTime = times[ix]
    flareTable = pd.DataFrame(flareFlux,flareTime)
    
    return[flareTable]

In [119]:
TESSDATA = '/Users/katborski/Documents/GitHub/AFPSC/TESS/tess2021232031932-s0042-0000000250081915-0213-a_fast-lc.fits'
cleaned_data = dataClean(TESSDATA)

In [123]:
table = flareFinder(cleaned_data,3)

In [124]:
table

[                                   0
 2021-08-22 06:25:59.109  4558.009766
 2021-09-03 20:20:28.846  4542.239746
 2021-09-03 20:20:48.846  4597.621582
 2021-09-03 20:21:08.846  4685.850098
 2021-09-03 20:21:28.847  4653.795898
 2021-09-03 20:21:48.847  4659.321777
 2021-09-03 20:22:08.848  4707.171387
 2021-09-03 20:22:28.848  4613.520020
 2021-09-03 20:22:48.848  4610.151855
 2021-09-03 20:23:08.849  4566.428223
 2021-09-03 20:23:28.849  4581.015625
 2021-09-03 20:23:48.850  4537.677246
 2021-09-10 13:41:34.779  4546.991699]