In [3]:
import numpy as np
import lightkurve as lk
import matplotlib.pyplot as plt
from scipy import stats
from scipy.signal import find_peaks
from scipy.signal import savgol_filter
import os

from flare_timestamps import get_timestamps
from flare_timestamps import get_rotation_period

#these are used for the second half of the dataset (superflares_2.txt) because the dataset had to be split and processed in two parts

# from flare_timestamps2 import get_timestamps_2
# from flare_timestamps2 import get_rotation_period_2

The code for creating one lightcurve is now applied to create all lightcurves from the superflares.txt file. This is done by looping over all target names.

In [4]:
#recreate the same code as above but for all target names
target_names= np.unique(np.loadtxt('superflares.txt', dtype='str', usecols=(0)))
input_file = 'superflares.txt'


for target_name in target_names:

    #create a subfolder in the 'plots' folder for each target with the target name
    if not os.path.exists('plots/' + 'KIC' + target_name):
        os.makedirs('plots/' + 'KIC' + target_name)

    all_timestamps = get_timestamps(target_name, input_file)

    try:
        search_result = lk.search_lightcurve(f'KIC {target_name}', mission='Kepler', cadence='long')
        lc_collection = search_result.download_all()
        lc = np.array((lc_collection.stitch().flux) - 1)
        t = np.array(lc_collection.stitch().time.value)
    except:
        print('Target not found')
        exit()

    timestamps = []
    for element in all_timestamps:
        element = float(element) + float(2400000)
        timestamp = np.round(element - 2454833, 2)
        timestamps.append(timestamp)


    for element in timestamps:

        #plot the lightcurve of the flare for an interval 40 days before and 20 days after the flare
        plt.figure(figsize=(10, 10))
        plt.plot(t, lc, 'black',  lw=0.3)
        plt.xlim(element-40, element+20)
        plt.xticks([element-40, element-30, element-20, element-10, element, element+10, element+20], ['-40','-30','-20','-10','0','10','20'])
        plt.grid()
        plt.title('Kepler Lightcurve for KIC: ' + target_name)
        # plt.title('Flare date: ' + str(element) + ' days')
        plt.xlabel('Day from peak')
        plt.ylabel('Flux (ΔF/F)')
        #save the plot directly in the github repository
        plt.savefig('plots/' + target_name + '/' + target_name + 'flare' + str(element) + '.png', dpi=300)
        plt.close()

    print(target_name, 'Done')

#push the directory to the github repository
os.system('git add .')
os.system('git commit -m "automated commit"')
os.system('git push')


FileNotFoundError: superflares.txt not found.

The second measure for the fluctuation before the flare is the peak to peak value. It can be calculated by finding all the maxima and minima in the lightcurve of the flux. The problem is that the overall lightcurve consists of small fluctuations that should not be counted as extrema. So the lightcurve is smoothed with the savitzky golay filter. It reduces the noise in the data so that the find_peaks function only registers caused by the rotating sgtarspots. The peak to peak values are then calculated by subtracting the minimum from the maximum of the smoothed lightcurve. Finally the mean value of all peak to peak values is calculated. This is only possible for an interval 40 days before the flare.

At the same time the hight of the flare or rather the flare intensity can is calculated. This is done by determining the flux value at the time of the flare and subtracting the flux value one data point before the flare.

In [None]:
hight=[]
peak_peak= []

target_names= np.loadtxt('superflares.txt', dtype='str', usecols=0)
timestamps= np.loadtxt('superflares.txt', dtype='str', usecols=11)

for target_name in target_names:
    all_timestamps = get_timestamps(target_name, timestamps)

    #search for the lightcurve and download it, if it doesn't exist, skip to the next target
    try:
        search_result = lk.search_lightcurve(f'KIC {target_name}', mission='Kepler', cadence='long')
        lc_collection = search_result.download_all()
    except:
        print('Lightcurve not found')
        continue 

    timestamps = []
    for element in all_timestamps:
        element = float(element) + float(2400000)
        timestamp = np.round(element - 2454833, 2)
        timestamps.append(timestamp)


    lc = np.array((lc_collection.stitch().flux) - 1)
    t = np.array(lc_collection.stitch().time.value)

    #find the rotation period of the target
    input_file = 'superflares.txt'
    rotation_period = get_rotation_period(target_name, input_file)

    flux= []
    for element in timestamps:
        
        #round t to 2 decimals
        t = np.round(t, 2)
        el = element - 54833

        index = np.where(t == element)

        
        flux.append(lc[index])

        #find the flux value one index before the flare
        start_index = index[0]

        #determine the difference between the flux value of the flare and the flux value one index before the flare
        flux_diff = (lc[index] - lc[start_index - 1])
        hight.append(flux_diff)

        yhat = savgol_filter(lc, 101, 3) # window size 51, polynomial order 3

        #find the peaks in the lightcurve
        #calculate the mean time difference between two data points
        mean_time_diff = np.mean(np.diff(t))
        peaks, _ = find_peaks(yhat, height=0, distance=rotation_period/mean_time_diff)
        minima = find_peaks(-yhat, height=0, distance=rotation_period/mean_time_diff)

        #only take the peaks 40 days before and after the flare
        peaks = peaks[(peaks > el - 40)]
        minima = minima[0][(minima[0] > el - 40)]

        #match the number of peaks with the number of minima
        if len(peaks) > len(minima):
            peaks = peaks[0:len(minima)]
        elif len(minima) > len(peaks):
            minima = minima[0:len(peaks)]
        else:
            pass
        


        differnce = np.mean(yhat[peaks] - yhat[minima])
        peak_peak.append(differnce)

    print(target_name, 'Done')

 #save the created array as a txt file
# np.savetxt('peak_peak.txt', peak_peak, fmt='%s')
# np.savetxt('hight.txt', hight, fmt='%s')



The superflares are then separated according to their rotation period which is in the 6. column of the superflares.txt file. The rotation periods are then matched to the peak to peak value as well as the bolometric energy (used later fro different plots). Then 4 different arrays are created for each rotation period interval. 

In [3]:
superflares= np.loadtxt('superflares.txt', usecols= (0,6,13))
superflares_2= np.loadtxt('superflares_2.txt', usecols= (0,6,13))

superflares= np.concatenate((superflares, superflares_2))

peak_peak= np.loadtxt('peak_peak.txt')
peak_peak_2= np.loadtxt('peak_peak_2.txt')

peak_peak= np.concatenate((peak_peak, peak_peak_2))

#match the length of peak_peak to superflares
peak_peak= peak_peak[:len(superflares)]

#match the length of superflares to peak_peak
superflares= superflares[:len(peak_peak)]

all= np.column_stack((superflares, peak_peak))

#find all rows where the value in the second colum is under 5 or equal to 5
all5= all[all[:,1] <= 5]

#repeat the same for values between 6 and 10 including 10
all10= all[(all[:,1] > 5) & (all[:,1] <= 10)]

#repeat the same for values between 11 and 20 including 20
all20= all[(all[:,1] > 10) & (all[:,1] <= 20)]

#repeat the same for values between 21 and 40 including 40
all40= all[(all[:,1] > 20) & (all[:,1] <= 40)]

The statistical significance of the data can now be calculated. This is done same as before by calculating the correlation coefficients and the p values.

In [4]:
#remove all nan values from the data
all5=all5[~np.isnan(all5).any(axis=1)]
slope, intercept, r_value, p_value, std_err = stats.linregress(all5[:,2],all5[:,3])
print('5 days:', r_value)

all10=all10[~np.isnan(all10).any(axis=1)]
slope10, intercept10, r_value10, p_value10, std_err10 = stats.linregress(all10[:,2],all10[:,3])
print('10 days:',r_value10)

all20=all20[~np.isnan(all20).any(axis=1)]
slope20, intercept20, r_value20, p_value20, std_err20 = stats.linregress(all20[:,2],all20[:,3])
print('20 days:',r_value20)

all40=all40[~np.isnan(all40).any(axis=1)]
slope40, intercept40, r_value40, p_value40, std_err40 = stats.linregress(all40[:,2],all40[:,3])
print('40 days:',r_value40)


5 days: 0.08025358052288556
10 days: 0.097141978161468
20 days: -0.062394453960607436
40 days: -0.07018489105126458


In [5]:
np.savetxt('all5.txt', all5, fmt='%s')
np.savetxt('all10.txt', all10, fmt='%s')
np.savetxt('all20.txt', all20, fmt='%s')
np.savetxt('all40.txt', all40, fmt='%s')

In [None]:
slope, intercept, r_value, p_value, std_err = stats.linregress(all5[:,2],all5[:,3])
slope10, intercept10, r_value10, p_value10, std_err10 = stats.linregress(all10[:,2],all10[:,3])
slope20, intercept20, r_value20, p_value20, std_err20 = stats.linregress(all20[:,2],all20[:,3])
slope40, intercept40, r_value40, p_value40, std_err40 = stats.linregress(all40[:,2],all40[:,3])

#with the r_vlaue claculate the standard error
r_value_std= np.sqrt((1-r_value**2)/(len(all5)-2))
r_value_std10= np.sqrt((1-r_value10**2)/(len(all10)-2))
r_value_std20= np.sqrt((1-r_value20**2)/(len(all20)-2))
r_value_std40= np.sqrt((1-r_value40**2)/(len(all40)-2))

#calculate the z-score
z_score= r_value/r_value_std
z_score10= r_value10/r_value_std10
z_score20= r_value20/r_value_std20
z_score40= r_value40/r_value_std40

#calculate the p-value
p_value= 2*(1-stats.norm.cdf(np.abs(z_score)))
p_value10= 2*(1-stats.norm.cdf(np.abs(z_score10)))
p_value20= 2*(1-stats.norm.cdf(np.abs(z_score20)))
p_value40= 2*(1-stats.norm.cdf(np.abs(z_score40)))

print('5 days:',  p_value)
print('10 days:', p_value10)
print('20 days:',  p_value20)
print('40 days:', p_value40)

print(r_value)