In [77]:
import requests
import bs4
import pandas as pd
import numpy as np
from grb.config import DATA_PATH
import pickle
import datetime

# Get data for crossmatching

Get final data matched with GOES

In [3]:
konus_sf_data = pd.read_table('http://www.ioffe.ru/LEA/Solar/KonusWIND_SolarFlares.txt').dropna().drop(columns='#')
konus_sf_data['UT'] = pd.to_datetime(konus_sf_data['TriggerDate']+' '+konus_sf_data['TriggerTime'])
konus_sf_data.to_csv(f'{DATA_PATH}KONUS_SF+GOES_catalog.csv')
konus_sf_data.tail()

Unnamed: 0,TriggerDate,TriggerTime,TriggerTime.1,Class,GOESTbegin,GOESTmax,GOESTend,UT
1012,2023-01-11,08:32:14.125,30734.125,M3.1,08:25,08:33,08:37,2023-01-11 08:32:14.125
1013,2023-01-13,10:12:44.322,36764.322,M3.9,10:05,10:15,10:19,2023-01-13 10:12:44.322
1014,2023-02-11,15:45:01.041,56701.041,X1.1,15:40,15:48,15:54,2023-02-11 15:45:01.041
1015,2023-02-23,06:13:12.583,22392.583,M1.5,06:11,06:14,06:18,2023-02-23 06:13:12.583
1016,2023-02-23,08:09:11.819,29351.819,C7.2,07:58,08:12,08:19,2023-02-23 08:09:11.819


Load trigger events in waiting mode

In [75]:
def parse_html_table(html_string):
    doc = bs4.BeautifulSoup(html_string)
    table = doc.find('table')
    if table is None:
        return None
    
    data = []
    all_th = table.find_all('th')
    all_heads = [th.get_text() for th in all_th]
    for tr in table.find_all('tr'):
        all_th = tr.find_all('th')
        if all_th:
            continue
        all_td = tr.find_all('td')
        data.append([td.get_text() for td in all_td])
    return pd.DataFrame(data, columns = all_heads)

konus_triggers_data = None
for year in range(2002,2024):
    r = requests.get(f'http://www.ioffe.ru/LEA/kw/wm/{year}/index.html')
    table = parse_html_table(r.text)
    
    konus_triggers_data = pd.concat((konus_triggers_data,table))
konus_triggers_data['Trigger seconds'] = konus_triggers_data['UT'].apply(lambda x: x.split(' ')[-1][1:-1])
konus_triggers_data['UT'] = konus_triggers_data['UT'].apply(lambda x: ' '.join(x.split(' ')[:-1]))
konus_triggers_data['UT'] = pd.to_datetime(konus_triggers_data['UT'])
konus_triggers_data['Dur.'] = konus_triggers_data['Dur.'].astype(int)
konus_triggers_data = konus_triggers_data.dropna().reset_index(drop=True)
konus_triggers_data.to_csv(f'{DATA_PATH}KONUS_waitin_mode_events_catalog.csv')
konus_triggers_data.tail()

Unnamed: 0,UT,Dur.,Name,Type,Det.,Channels,KW,Others,Comment,Trigger seconds
16887,2019-06-05 23:22:27,3,,,S2,G2,,FER,,84147
16888,2019-06-06 01:55:03,3,GRB 190606A,sGRB,"S1,S2","G1,G2,G3",TRIG,,KW GCN 24784,6903
16889,2019-06-06 09:34:07,18,,,S2,"G1,G2",,,,34447
16890,2019-06-06 13:21:39,38,,,S2,"G1,G2",,,,48099
16891,2019-06-07 01:42:45,35,,,"S1,S2","G1,G2",,FER,,6165


Konus GRBs in waiting mode

In [68]:
konus_triggers_data[konus_triggers_data['Comment'].apply(lambda x:x.find('GRB')>=0)].to_csv(f'{DATA_PATH}KONUS_GRBS.csv')
konus_triggers_data.head()

Unnamed: 0.1,Unnamed: 0,UT,Dur.,Name,Type,Det.,Channels,KW,Others,Comment,Trigger seconds,left,right
0,58,2002-01-27 07:07:11,253,,GRB,"S1,S2","G1,G2,G3",TRIG,,notice: ~180 s long GRB,25631,2002-01-27 07:07:11,2002-01-27 07:11:24
1,61,2002-01-28 02:00:37,26,,,S2,"G1,G2,G3",,,GRB candidate,7237,2002-01-28 02:00:37,2002-01-28 02:01:03
2,166,2002-02-15 18:37:16,91,,,S2,"G1,G2,G3",,,GRB candidate,67036,2002-02-15 18:37:16,2002-02-15 18:38:47
3,182,2002-02-18 19:45:33,262,,GRB,"S1,S2","G1,G2,G3",TRIG,,notice: ~50 s long GRB,71133,2002-02-18 19:45:33,2002-02-18 19:49:55
4,290,2002-03-05 11:58:39,59,,,"S1,S2","G1,G2,G3",,,GRB candidate,43119,2002-03-05 11:58:39,2002-03-05 11:59:38


Hurley masterlist

In [80]:
hurley = pd.DataFrame([datetime.datetime.strptime(' '.join(x.split()[1:4]+[x.split()[5]]),'%d %b %y %H:%M:%S') for x in pd.read_excel(f'{DATA_PATH}Hurley catalog.xlsx')['data'].to_list()],columns=['datetime'])

RHESSI and GOES flares

In [162]:
goes_cat = pd.read_csv(f'{DATA_PATH}goes_flares.csv').drop(columns='Unnamed: 0').drop_duplicates('t_max')
goes_cat['t_start'] = pd.to_datetime(goes_cat['t_start'], errors='coerce')
goes_cat['t_max'] = pd.to_datetime(goes_cat['t_max'],errors='coerce')
goes_cat['t_finish'] = pd.to_datetime(goes_cat['t_finish'],errors='coerce')
goes_cat = goes_cat.dropna().reset_index(drop=True)
goes_cat.head()

Unnamed: 0,id,t_start,t_max,t_finish,type
0,6020,2002-01-01 05:33:00,2002-01-01 05:33:00,2002-01-01 05:38:00,SF
1,6030,2002-01-01 08:59:00,2002-01-01 09:10:00,2002-01-01 09:17:00,C4.5
2,6040,2002-01-01 10:18:00,2002-01-01 10:18:00,2002-01-01 10:18:00,170
3,6050,2002-01-01 12:09:00,2002-01-01 12:09:00,2002-01-01 12:10:00,72
4,6080,2002-01-02 01:15:00,2002-01-02 01:23:00,2002-01-02 01:43:00,SF


In [127]:
with open(f'{DATA_PATH}RHESSI SF catalog.txt') as file:
    RHESSI = [(datetime.datetime.strptime(line.split()[1]+' '+line.split()[2],'%d-%b-%Y %H:%M:%S'),datetime.datetime.strptime(line.split()[1]+' '+line.split()[4],'%d-%b-%Y %H:%M:%S')) for line in file]
RHESSI = pd.DataFrame(RHESSI,columns=['t_start','t_finish'])
RHESSI.head()

Unnamed: 0,t_start,t_finish
0,2002-02-12 21:29:56,2002-02-12 21:41:48
1,2002-02-12 21:44:08,2002-02-12 21:48:56
2,2002-02-13 00:53:24,2002-02-13 00:57:00
3,2002-02-13 04:22:52,2002-02-13 04:26:56
4,2002-02-13 07:03:52,2002-02-13 07:07:48


# Crossmatching

In [128]:
def find_closest_event(time,list_of_events):
    res = min(list_of_events, key=lambda sub: abs(sub - time))
    index = list(list_of_events).index(res)
    return index

def is_intersected(left_1,right_1,left_2,right_2):
    return (left_1 <= left_2 <= right_1) or \
            (left_1 <= right_2 <= right_1) or \
                (left_2 <= left_1 <= right_2) or \
                (left_2 <= right_1 <= right_2)

In [129]:

with open(f'{DATA_PATH}fixed_durations.pkl','rb') as f:
    final_catalog = pickle.load(f)
df_fixed_durs = pd.DataFrame(final_catalog)

df_candidates = pd.read_excel(f'{DATA_PATH}candidates_crossmatched_final (2).xlsx',parse_dates=['datetime'])

candidates = df_candidates.merge(df_fixed_durs,left_on='datetime',right_on='datetime').sort_values('datetime')
candidates['duration'] = np.where(2*candidates['duration_x']<candidates['duration_y'],candidates['duration_x'],candidates['duration_y'])

konus_sf_data['left'] = konus_sf_data['right'] = konus_sf_data['UT']
candidates['Konus_SF'] = 0
for i,event in candidates.iterrows():
    idx = find_closest_event(event['datetime'],konus_sf_data['UT'])
    if is_intersected(event['datetime'],
                      event['datetime'] + np.timedelta64(int(event['duration']),'s'),
                      konus_sf_data.iloc[idx]['left'],
                      konus_sf_data.iloc[idx]['right']):
        candidates.loc[candidates.index==i,['Konus_SF']] = 1
candidates.to_csv(f'{DATA_PATH}candidates_after_konus_flares.csv')

In [167]:
candidates = pd.read_csv(f'{DATA_PATH}candidates_after_konus_flares.csv',parse_dates=['datetime'])[['datetime','duration','resolution','distance','acs_flux','chi_2','bins','Konus_SF']]
candidates.head()

Unnamed: 0,datetime,duration,resolution,distance,acs_flux,chi_2,bins,Konus_SF
0,2003-02-12 03:38:53.978,21.875011,3.125,0.55,5042.429,3.4,[0.09088023 0.10083312 0.10944655 0.15154681 0...,0
1,2003-02-14 04:04:43.816,540.005252,20.0,0.93,263855.0,1.7,[0.12794046 0.17681547 0.16667482 0.19639431 0...,0
2,2003-02-14 09:49:43.816,194.807866,15.0,1.17,101094.0,1.8,[0.11574815 0.17832571 0.12355629 0.17196501 0...,0
3,2003-02-14 09:52:14.990,1399.115153,100.0,1.16,201425.5,1.3,[0.10375708 0.41574303 0.10659213 0.16520606 0...,0
4,2003-02-15 11:12:13.993,399.0,100.0,0.23,33155560.0,57.9,[0.28050062 0.21936404 0.1671369 0.12351538 0...,0


Crossmatch with self to remove duplicates

In [168]:
candidates['left'] = candidates['datetime']
candidates['right'] = candidates.apply(lambda row:row['datetime'] + np.timedelta64(int(row['duration']),'s'),axis=1)

candidates['duplicated'] = 0
pair_index = 0
for i,event in candidates.iterrows():
    if i < candidates.shape[0] - 1:
        idx = i + 1
    else: 
        break
    if is_intersected(event['datetime'],
                      event['datetime'] + np.timedelta64(int(event['duration']),'s'),
                      candidates.iloc[idx]['left'],
                      candidates.iloc[idx]['right']):
        if candidates.iloc[idx]['duplicated'] == 0 and candidates.iloc[i]['duplicated'] == 0:
            pair_index += 1
        candidates.loc[candidates.index==i,['duplicated']] = pair_index
        candidates.loc[candidates.index==idx,['duplicated']] = pair_index

filtered_candidates = candidates[candidates['duplicated'] == 0]
for i,row in candidates[candidates['duplicated'] != 0].iterrows():
    if  (pair := row['duplicated']) not in filtered_candidates['duplicated'].unique():
        filtered_candidates = pd.concat((filtered_candidates,candidates[candidates['duplicated']==pair].sort_values('duration').head(1)))
filtered_candidates = filtered_candidates.sort_values('datetime').drop(columns=['duplicated'])

Crossmatch with grbs

In [171]:
konus_triggers_data = pd.read_csv(f'{DATA_PATH}KONUS_GRBS.csv',parse_dates=['UT'])
konus_triggers_data['left'] = konus_triggers_data['UT']
konus_triggers_data['right'] = konus_triggers_data.apply(lambda row:row['UT'] + np.timedelta64(row['Dur.'],'s'),axis=1)

filtered_candidates['grb'] = 0
filtered_candidates['comment'] = ''
for i,event in filtered_candidates.iterrows():
    idx = find_closest_event(event['datetime'],konus_triggers_data['UT'])
    if is_intersected(event['datetime'],
                      event['datetime'] + np.timedelta64(int(event['duration']),'s'),
                      konus_triggers_data.iloc[idx]['left'],
                      konus_triggers_data.iloc[idx]['right']):
        filtered_candidates.loc[filtered_candidates.index==i,['grb']] = 1
        filtered_candidates.loc[filtered_candidates.index==i,['comment']] = konus_triggers_data.iloc[idx]['Comment']

In [172]:
hurley['left'] = hurley['right'] = hurley['datetime']

filtered_candidates['hurley'] = 0
for i,event in filtered_candidates.iterrows():
    idx = find_closest_event(event['datetime'],hurley['datetime'])
    if is_intersected(event['datetime'],
                      event['datetime'] + np.timedelta64(int(event['duration']),'s'),
                      hurley.iloc[idx]['left'],
                      hurley.iloc[idx]['right']):
        filtered_candidates.loc[filtered_candidates.index==i,['hurley']] = 1

crossmatch with solar flares

In [173]:
filtered_candidates['goes'] = 0
for i,event in filtered_candidates.iterrows():
    idx = find_closest_event(event['datetime'],goes_cat['t_start'])
    if is_intersected(event['datetime'],
                      event['datetime'] + np.timedelta64(int(event['duration']),'s'),
                      goes_cat.iloc[idx]['t_start'],
                      goes_cat.iloc[idx]['t_finish']):
        filtered_candidates.loc[filtered_candidates.index==i,['goes']] = 1

filtered_candidates['rhessi'] = 0
for i,event in filtered_candidates.iterrows():
    idx = find_closest_event(event['datetime'],RHESSI['t_start'])
    if is_intersected(event['datetime'],
                      event['datetime'] + np.timedelta64(int(event['duration']),'s'),
                      RHESSI.iloc[idx]['t_start'],
                      RHESSI.iloc[idx]['t_finish']):
        filtered_candidates.loc[filtered_candidates.index==i,['rhessi']] = 1

# Tag remaining events

Apply background criteria

In [183]:
filtered_candidates['bkg'] = filtered_candidates.apply(lambda row:row['chi_2']>3 if row['duration']*3 <= 3000 else row['chi_2']>4.5,axis=1).astype(int)

Rest are candidates

In [186]:
filtered_candidates['candidates'] = filtered_candidates.apply(lambda row: (row['Konus_SF'] + row['grb'] + row['hurley'] + row['goes'] + row['rhessi'] + row['bkg']) == 0, axis = 1).astype(int)

In [187]:
filtered_candidates.head(5)

Unnamed: 0,datetime,duration,resolution,distance,acs_flux,chi_2,bins,Konus_SF,left,right,grb,comment,hurley,goes,rhessi,bkg,candidates
0,2003-02-12 03:38:53.978,21.875011,3.125,0.55,5042.429,3.4,[0.09088023 0.10083312 0.10944655 0.15154681 0...,0,2003-02-12 03:38:53.978,2003-02-12 03:39:14.978,0,,0,0,0,1,0
1,2003-02-14 04:04:43.816,540.005252,20.0,0.93,263855.0,1.7,[0.12794046 0.17681547 0.16667482 0.19639431 0...,0,2003-02-14 04:04:43.816,2003-02-14 04:13:43.816,0,,0,1,1,0,0
2,2003-02-14 09:49:43.816,194.807866,15.0,1.17,101094.0,1.8,[0.11574815 0.17832571 0.12355629 0.17196501 0...,0,2003-02-14 09:49:43.816,2003-02-14 09:52:57.816,0,,0,0,0,0,1
6,2003-02-15 11:14:03.816,80.001214,5.0,0.23,622118.1,8.5,[0.00057672 0. 0.005727 0.11288872 0...,0,2003-02-15 11:14:03.816,2003-02-15 11:15:23.816,0,,0,0,0,1,0
9,2003-02-15 15:43:43.816,360.003661,20.0,0.04,1400081.0,3.0,[0. 0.04308538 0.16866584 0.11304555 0...,0,2003-02-15 15:43:43.816,2003-02-15 15:49:43.816,0,,0,0,1,0,0


In [188]:
filtered_candidates.to_pickle(f'{DATA_PATH}filtered_candidates.pkl')