In [40]:
import requests
import bs4
import pandas as pd
import numpy as np
from grb.config import DATA_PATH

# Get KONUS-Wind data

Get final data with GOES

In [36]:
konus_sf_data = pd.read_table('http://www.ioffe.ru/LEA/Solar/KonusWIND_SolarFlares.txt').dropna().drop(columns='#')
konus_sf_data.to_csv(f'{DATA_PATH}KONUS_SF+GOES_catalog.csv')
konus_sf_data.tail()

Unnamed: 0,TriggerDate,TriggerTime,TriggerTime.1,Class,GOESTbegin,GOESTmax,GOESTend
1012,2023-01-11,08:32:14.125,30734.125,M3.1,08:25,08:33,08:37
1013,2023-01-13,10:12:44.322,36764.322,M3.9,10:05,10:15,10:19
1014,2023-02-11,15:45:01.041,56701.041,X1.1,15:40,15:48,15:54
1015,2023-02-23,06:13:12.583,22392.583,M1.5,06:11,06:14,06:18
1016,2023-02-23,08:09:11.819,29351.819,C7.2,07:58,08:12,08:19


In [58]:
def parse_html_table(html_string):
    doc = bs4.BeautifulSoup(html_string)
    table = doc.find('table')
    if table is None:
        return None
    
    data = []
    all_th = table.find_all('th')
    all_heads = [th.get_text() for th in all_th]
    for tr in table.find_all('tr'):
        all_th = tr.find_all('th')
        if all_th:
            continue
        all_td = tr.find_all('td')
        data.append([td.get_text() for td in all_td])
    return pd.DataFrame(data, columns = all_heads)

konus_triggers_data = None
for year in range(2002,2024):
    r = requests.get(f'http://www.ioffe.ru/LEA/kw/wm/{year}/index.html')
    table = parse_html_table(r.text)
    
    konus_triggers_data = pd.concat((konus_triggers_data,table))
konus_triggers_data['Trigger seconds'] = konus_triggers_data['UT'].apply(lambda x: x.split(' ')[-1][1:-1])
konus_triggers_data['UT'] = konus_triggers_data['UT'].apply(lambda x: ' '.join(x.split(' ')[:-1]))
konus_triggers_data['UT'] = pd.to_datetime(konus_triggers_data['UT'])
konus_triggers_data['Dur.'] = konus_triggers_data['Dur.'].astype(int)
konus_triggers_data = konus_triggers_data.dropna().reset_index(drop=True)
konus_triggers_data.to_csv(f'{DATA_PATH}KONUS_SF_catalog.csv')
konus_triggers_data.tail()

Unnamed: 0,UT,Dur.,Name,Type,Det.,Channels,KW,Others,Comment,Trigger seconds
16887,2019-06-05 23:22:27,3,,,S2,G2,,FER,,84147
16888,2019-06-06 01:55:03,3,GRB 190606A,sGRB,"S1,S2","G1,G2,G3",TRIG,,KW GCN 24784,6903
16889,2019-06-06 09:34:07,18,,,S2,"G1,G2",,,,34447
16890,2019-06-06 13:21:39,38,,,S2,"G1,G2",,,,48099
16891,2019-06-07 01:42:45,35,,,"S1,S2","G1,G2",,FER,,6165


In [57]:
def find_closest_event(time,list_of_events):
    res = min(list_of_events, key=lambda sub: abs(sub - time))
    index = list(list_of_events).index(res)
    return index

In [42]:
candidates = pd.read_excel(f'{DATA_PATH}candidates_crossmatched_final (2).xlsx')
candidates

Unnamed: 0.1,Unnamed: 0,datetime,distance,duration,duration_err,integral_flux,integral_flux_err,significance,bkg_chi2,Bkg,Hurley,RHESSI,Konus,GOES,Candidate
0,0,2003-02-14 09:49:43.816,1.17,380,60,102959,4944,20.8,4.8,1,0,0,0,0,0
1,1,2003-02-15 11:13:44.816,0.23,260,60,618852,3481,177.8,34.0,1,1,0,0,0,0
2,2,2003-02-15 15:41:43.816,0.04,560,60,1413011,6509,217.1,2.9,0,0,0,0,0,1
3,3,2003-02-16 10:33:43.816,0.74,440,60,56108,5475,10.2,2.0,0,0,0,0,0,1
4,4,2003-02-17 02:42:43.816,1.42,260,60,375627,3453,108.8,17.0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5475,5475,2020-11-09 23:12:08.992,0.48,1400,200,63982,13505,4.7,1.3,0,0,0,0,0,1
5476,5476,2020-11-23 13:25:29.996,0.13,200,200,16241,6002,2.7,1.1,0,0,0,0,0,1
5477,5477,2020-11-23 13:58:49.998,0.12,200,200,13160,6002,2.2,1.0,0,0,0,0,0,1
5478,5478,2020-11-25 12:42:08.996,1.20,200,200,12309,6015,2.0,1.1,0,0,0,0,0,1


In [None]:
konus_triggers_data['left'] = konus_triggers_data['UT']
konus_triggers_data['right'] = konus_triggers_data.apply(lambda row:row['UT'] + np.timedelta64(row['Dur.'],'s'),axis=1)

for i,event in candidates.iterrows():
    idx = find_closest_event(event['datetime'],konus_triggers_data['UT'])
    if (event['datetime'] <= konus_triggers_data.iloc[idx]['left'] <= event['datetime'] + np.timedelta64(event['duration'],'s')) or \
            (event['datetime'] <= konus_triggers_data.iloc[idx]['right'] <= event['datetime'] + np.timedelta64(event['duration'],'s')):
        candidates.iloc[i]['crossmatched'] = 1