## Setup

In [83]:
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv

In [84]:
load_dotenv()

events_csv_path = os.getenv("EVENTS_CSV_PATH")
xrays_csv_path = os.getenv("XRAY_V2_CSV_PATH")
data = {}

begin_year = 1983
end_year = 2024
year_range = range(begin_year, end_year)

## Reading CSVs

In [85]:
missing_files = []
for y in range(1983, 1995+1):
    f = missing_files.append(os.path.join(events_csv_path, os.path.join(str(y), f"{y}_events.csv")))
    missing_files.append(os.path.join(events_csv_path, os.path.join(str(y), f"{y}_DSD.csv")))
for y in range(2021, 2024+1):
    missing_files.append(os.path.join(xrays_csv_path, os.path.join(str(y), f"{y}_xrays.csv")))

In [86]:
missing_events_dsd = [
    os.path.join(events_csv_path, str(y), f"{y}_{file_type}.csv")
    for y in range(1983, 1995+1)
    for file_type in ('events', 'DSD')
]
missing_xrays = [
    os.path.join(xrays_csv_path, str(y), f"{y}_xrays.csv")
    for y in range(2021, 2024+1)
]
missing_files = set(missing_events_dsd + missing_xrays)

for y in year_range:
    data[y] = {}
    data[y]['events'] = {}
    events_year_dir = os.path.join(events_csv_path, str(y))
    xrays_year_dir = os.path.join(xrays_csv_path, str(y))

    try:
        data[y]['events']['all'] = pd.read_csv(os.path.join(events_year_dir, f"{y}_events.csv"))
    except FileNotFoundError as e:
        data[y]['events']['all'] = pd.DataFrame()
        if e.filename not in missing_files: print(e)

    try:
        data[y]['events']['DSD'] = pd.read_csv(os.path.join(events_year_dir, f"{y}_DSD.csv"))
    except FileNotFoundError as e:
        data[y]['events']['DSD'] = pd.DataFrame()
        if e.filename not in missing_files: print(e)

    try:
        data[y]['xrays'] = pd.read_csv(os.path.join(xrays_year_dir, f"{y}_xrays.csv"))
    except FileNotFoundError as e:
        data[y]['xrays'] = pd.DataFrame()
        if e.filename not in missing_files: print(e)

## Categorizing Different Events Types

In [87]:
event_types = ("BSL", "DSF", "EPL", "FIL", "FLA", "FOR", "GLE", "LPS", "PCA", "RBR", "RNS", "RSP", "XRA")

for y in year_range:
    df_all = data[y]['events']['all']
    for t in event_types:
        try:
            mask = df_all['type'] == t
            data[y]['events'][t] = df_all[mask].reset_index(drop=False)
            data[y]['events'][t] = data[y]['events'][t].rename(columns={'index': 'original_index'})
        except KeyError as e:
            data[y]['events'][t] = pd.DataFrame()

## A

In [88]:
data[2009]['events']['DSD']

Unnamed: 0,ds,radio_flux_10.7cm,sunspot_number,sunspot_area,new_regions,mean_solar_field,goes_xray_bkgd_flux,flares_c,flares_m,flares_x,flares_optical_s,flares_optical_1,flares_optical_2,flares_optical_3
0,2009-01-01,69,0,0,0,,A0.0,0,0,0,0,0,0,0
1,2009-01-02,70,0,0,0,,A0.0,0,0,0,0,0,0,0
2,2009-01-03,70,0,0,0,,A0.0,0,0,0,0,0,0,0
3,2009-01-04,69,0,0,0,,A0.0,0,0,0,0,0,0,0
4,2009-01-05,69,0,0,0,,A0.0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2009-12-27,77,17,0,0,,A2.0,0,0,0,0,0,0,0
361,2009-12-28,76,17,90,0,,A1.4,0,0,0,0,0,0,0
362,2009-12-29,75,17,80,0,,A1.2,0,0,0,0,0,0,0
363,2009-12-30,77,15,50,0,,A1.4,0,0,0,0,0,0,0


In [89]:
data[2009]['events']['all']

Unnamed: 0,date,event,begin,max,end,obs,q,type,loc_frq,particulars,reg#
0,2009-01-09,210+,2009-01-09 23:58:00,2009-01-10 00:03:00,2009-01-10 00:05:00,G10,5,XRA,1-8A,B4.0 7.6E-05,1010.0
1,2009-01-10,220+,2009-01-10 00:47:00,2009-01-10 00:51:00,2009-01-10 00:55:00,G10,5,XRA,1-8A,B1.2 4.0E-05,1010.0
2,2009-01-10,230+,2009-01-10 02:33:00,2009-01-10 02:36:00,2009-01-10 02:39:00,G10,5,XRA,1-8A,B1.0 2.3E-05,1010.0
3,2009-01-18,280,2009-01-18 05:03:00,,2009-01-18 05:03:00,LEA,C,RSP,025-059,III/1,
4,2009-01-18,290,2009-01-18 05:04:00,,2009-01-18 05:04:00,CUL,C,RSP,20-90,III/1,
...,...,...,...,...,...,...,...,...,...,...,...
444,2009-12-31,4210,2009-12-31 10:54:00,2009-12-31 10:59:00,2009-12-31 11:06:00,G14,5,XRA,1-8A,B1.6 9.3E-05,
445,2009-12-31,4220,2009-12-31 11:15:00,2009-12-31 11:20:00,2009-12-31 11:31:00,G14,5,XRA,1-8A,B1.9 1.6E-04,
446,2009-12-31,4230,2009-12-31 14:29:00,2009-12-31 14:32:00,2009-12-31 14:51:00,G14,5,XRA,1-8A,B1.3 1.4E-04,
447,2009-12-31,4240,2009-12-31 15:37:00,2009-12-31 15:42:00,2009-12-31 15:46:00,G14,5,XRA,1-8A,B1.5 6.1E-05,


In [90]:
data[2009]['xrays']

Unnamed: 0,ds,xs,xl
0,2009-01-01 00:00:00,3.570000e-09,3.720000e-09
1,2009-01-01 00:01:00,3.540000e-09,3.720000e-09
2,2009-01-01 00:02:00,3.520000e-09,3.720000e-09
3,2009-01-01 00:03:00,3.560000e-09,3.720000e-09
4,2009-01-01 00:04:00,3.540000e-09,3.720000e-09
...,...,...,...
525595,2009-12-31 23:55:00,,
525596,2009-12-31 23:56:00,,
525597,2009-12-31 23:57:00,,
525598,2009-12-31 23:58:00,,


In [93]:
data[2009]['events']['FLA']

Unnamed: 0,original_index,date,event,begin,max,end,obs,q,type,loc_frq,particulars,reg#
0,76,2009-07-04,940,2009-07-04 02:08:00,2009-07-04 02:12:00,2009-07-04 02:13:00,LEA,3,FLA,S27E15,SF DSD,1024.0
1,80,2009-07-04,970,2009-07-04 04:37:00,2009-07-04 04:38:00,2009-07-04 04:42:00,LEA,3,FLA,S26E11,SF DSD,1024.0
2,88,2009-07-04,1050,2009-07-04 15:09:00,,2009-07-04 15:16:00,HOL,3,FLA,S27E06,SF ERU,1024.0
3,98,2009-07-05,1140,2009-07-05 07:11:00,2009-07-05 07:13:00,2009-07-05 07:21:00,LEA,3,FLA,S27W02,SF,1024.0
4,110,2009-07-06,1250,2009-07-06 17:03:00,2009-07-06 17:04:00,2009-07-06 17:25:00,HOL,3,FLA,S25W23,SF ERU,1024.0
5,114,2009-07-07,1300,2009-07-07 17:07:00,2009-07-07 17:08:00,2009-07-07 17:10:00,HOL,3,FLA,S25W37,SF DSD,1024.0
6,137,2009-09-24,1530,2009-09-24 23:45:00,2009-09-24 23:53:00,2009-09-25 00:01:00,HOL,4,FLA,S32E28,SF ERU,1026.0
7,170,2009-10-26,1850,2009-10-26 18:42:00,2009-10-26 19:03:00,2009-10-26 19:43:00,HOL,3,FLA,N16W29,SF ERU,1029.0
8,171,2009-10-26,1860,2009-10-26 19:52:00,2009-10-26 19:55:00,2009-10-26 19:59:00,HOL,3,FLA,N16W30,SF ERU,1029.0
9,177,2009-10-26,1900,2009-10-26 22:41:00,2009-10-26 22:50:00,2009-10-26 23:00:00,HOL,3,FLA,N16W30,SF ERU,1029.0


In [95]:
data[2009]['events']['XRA']

Unnamed: 0,original_index,date,event,begin,max,end,obs,q,type,loc_frq,particulars,reg#
0,0,2009-01-09,210+,2009-01-09 23:58:00,2009-01-10 00:03:00,2009-01-10 00:05:00,G10,5,XRA,1-8A,B4.0 7.6E-05,1010.0
1,1,2009-01-10,220+,2009-01-10 00:47:00,2009-01-10 00:51:00,2009-01-10 00:55:00,G10,5,XRA,1-8A,B1.2 4.0E-05,1010.0
2,2,2009-01-10,230+,2009-01-10 02:33:00,2009-01-10 02:36:00,2009-01-10 02:39:00,G10,5,XRA,1-8A,B1.0 2.3E-05,1010.0
3,8,2009-02-10,330+,2009-02-10 23:00:00,2009-02-10 23:11:00,2009-02-10 23:42:00,G10,5,XRA,1-8A,B1.3 2.8E-04,1012.0
4,9,2009-02-12,340+,2009-02-12 16:10:00,2009-02-12 16:19:00,2009-02-12 16:31:00,G10,5,XRA,1-8A,B4.1 3.5E-04,1012.0
...,...,...,...,...,...,...,...,...,...,...,...,...
251,444,2009-12-31,4210,2009-12-31 10:54:00,2009-12-31 10:59:00,2009-12-31 11:06:00,G14,5,XRA,1-8A,B1.6 9.3E-05,
252,445,2009-12-31,4220,2009-12-31 11:15:00,2009-12-31 11:20:00,2009-12-31 11:31:00,G14,5,XRA,1-8A,B1.9 1.6E-04,
253,446,2009-12-31,4230,2009-12-31 14:29:00,2009-12-31 14:32:00,2009-12-31 14:51:00,G14,5,XRA,1-8A,B1.3 1.4E-04,
254,447,2009-12-31,4240,2009-12-31 15:37:00,2009-12-31 15:42:00,2009-12-31 15:46:00,G14,5,XRA,1-8A,B1.5 6.1E-05,
