# Load times from GFOC and SWMA Catalogue

In [1]:
import pandas as pd

# Load the GOFC data more efficiently
GFOC_dir = "/home/dschwarz/Documents/MT/Dataset/Dataset_MSc/GFOC_RDCDFI.csv"
GFOC_data = pd.read_csv(GFOC_dir, low_memory=True)

# Load the SWMA data more efficiently
SWMA_dir = "/home/dschwarz/Documents/MT/Dataset/Dataset_MSc/SWMA_RDAWFI.csv"
SWMA_data = pd.read_csv(SWMA_dir, low_memory=True)

In [2]:
# Time conversion
GFOC_time = GFOC_data['time']
GFOC_time = pd.to_datetime(GFOC_time, format='%Y-%m-%d %H:%M:%S')
GFOC_start = GFOC_time[0]
GFOC_end = GFOC_time[len(GFOC_time) - 1]
SWMA_time = SWMA_data['time']
SWMA_time = pd.to_datetime(SWMA_time, format='%Y-%m-%d %H:%M:%S')
SWMA_start = SWMA_time[0]
SWMA_end = SWMA_time[len(SWMA_time) - 1]

# Load Shock & both ICME catalogues

In [3]:
import pandas as pd

# import the data
Shock = pd.read_csv('./Dataset_IPshocks/shocks_20250514_121541.csv')

# Shock_Wind = Shock[Shock['Spacecraft'] == 'Wind']
# Shock_Wind = Shock[Shock['Spacecraft'] == 'ACE']
# Shock_Wind = Shock[Shock['Spacecraft'] == 'DSCOVR']
# Shock_Wind = Shock[Shock['Spacecraft'] == 'OMNI']

Helios_Wind = pd.read_csv('./Dataset_ICMECAT/helio4cast_icmecat.csv')

RC = pd.read_csv('./Dataset_ICMECAT/RC_icmecat_dates.csv')

# Create Flags

### First I want to check if all the times are in the correct order

In [23]:
# ======================================================================================
# Event times ordered from the latest to the earliest
# ======================================================================================

# event_time_dt = pd.DatetimeIndex(Shock['Time'])
# event_time_dt = pd.DatetimeIndex(Helios_Wind['icme_start_time'])

# for i in range(len(event_time_dt) - 1):
#     if event_time_dt[-i - 2] < event_time_dt[-i -1]:
#         print("The times are not in the correct order")
#         print(i - 1)
#         print(event_time_dt[-i - 2])
#         print(event_time_dt[-i - 1])
#         break
# print("The times are in the correct order")

# ======================================================================================
# Event times ordered from the earliest to the latest
# ======================================================================================

event_time_dt = pd.DatetimeIndex(RC['ICME_Start'])

for i in range(len(event_time_dt) - 1):
    if event_time_dt[-i - 2] > event_time_dt[-i -1]:
        print("The times are not in the correct order")
        print(i - 1)
        print(event_time_dt[-i - 2])
        print(event_time_dt[-i - 1])
        break
print("The times are in the correct order")

The times are in the correct order


The times are in the correct order. However, I need to think about the order.

## Creating Flags for GFOC

- for ICME events I need to consider the start and end time of the event --> 1 if between start and end and 0 if not
- for Shock events there is only one time

In [2]:
import numpy as np
from datetime import timedelta

# only select times that are in the range of the GFOC data
Shock_GFOC = Shock[pd.to_datetime(Shock['Time'], format='%Y-%m-%d %H:%M:%S') >= GFOC_start]
Shock_GFOC = Shock_GFOC[pd.to_datetime(Shock_GFOC['Time'], format='%Y-%m-%d %H:%M:%S') <= GFOC_end]
Helios_GFOC = Helios_Wind[pd.to_datetime(Helios_Wind['icme_start_time'], format='%Y-%m-%d %H:%M:%S') >= GFOC_start]
Helios_GFOC = Helios_GFOC[pd.to_datetime(Helios_GFOC['icme_start_time'], format='%Y-%m-%d %H:%M:%S') <= GFOC_end]
RC_GFOC = RC[pd.to_datetime(RC['ICME_Start'], format='%Y-%m-%d %H:%M:%S') >= GFOC_start]
RC_GFOC = RC_GFOC[pd.to_datetime(RC_GFOC['ICME_Start'], format='%Y-%m-%d %H:%M:%S') <= GFOC_end]

Shock_flag_GFOC = np.zeros(len(GFOC_time), dtype=int)
Helios_flag_GFOC = np.zeros(len(GFOC_time), dtype=int)
RC_flag_GFOC = np.zeros(len(GFOC_time), dtype=int)

# print(GFOC_time[:10])

# Loop through Shock events, define a ±15 second window
for _, row in Shock_GFOC.iterrows():
    center = pd.to_datetime(row['Time'])
    start = center - timedelta(seconds=15)
    end = center + timedelta(seconds=15)
    
    # Set Shock flag to 1 for all GFOC_time values in this 30-second window
    Shock_flag_GFOC[(GFOC_time >= start) & (GFOC_time <= end)] = 1

# Loop through each start-end interval in Helios_GFOC
for _, row in Helios_GFOC.iterrows():
    start = pd.to_datetime(row['icme_start_time'])
    end = pd.to_datetime(row['mo_end_time'])
    
    # Set flag to 1 where GFOC_time is within this interval
    Helios_flag_GFOC[(GFOC_time >= start) & (GFOC_time <= end)] = 1

# Loop through each start-end interval in RC_GFOC
for _, row in RC_GFOC.iterrows():
    start = pd.to_datetime(row['ICME_Start'])
    end = pd.to_datetime(row['ICME_End'])
    
    # Set flag to 1 where GFOC_time is within this interval
    RC_flag_GFOC[(GFOC_time >= start) & (GFOC_time <= end)] = 1


GFOC_flag_df = pd.DataFrame({
    'GFOC_time': GFOC_time,
    'Shock_flag': Shock_flag_GFOC,
    'Helios_flag': Helios_flag_GFOC,
    'RC_flag': RC_flag_GFOC
})

# Save the DataFrame to a CSV file
GFOC_flag_df.to_csv('GFOC_flags.csv', index=False)

NameError: name 'GFOC_start' is not defined

## Creating Flags for SWMA

In [36]:
# only select times that are in the range of the SWMA data
Shock_SWMA = Shock[pd.to_datetime(Shock['Time'], format='%Y-%m-%d %H:%M:%S') >= SWMA_start]
Shock_SWMA = Shock_SWMA[pd.to_datetime(Shock_SWMA['Time'], format='%Y-%m-%d %H:%M:%S') <= SWMA_end]
Helios_SWMA = Helios_Wind[pd.to_datetime(Helios_Wind['icme_start_time'], format='%Y-%m-%d %H:%M:%S') >= SWMA_start]
Helios_SWMA = Helios_SWMA[pd.to_datetime(Helios_SWMA['icme_start_time'], format='%Y-%m-%d %H:%M:%S') <= SWMA_end]
RC_SWMA = RC[pd.to_datetime(RC['ICME_Start'], format='%Y-%m-%d %H:%M:%S') >= SWMA_start]
RC_SWMA = RC_SWMA[pd.to_datetime(RC_SWMA['ICME_Start'], format='%Y-%m-%d %H:%M:%S') <= SWMA_end]

Shock_flag_SWMA = np.zeros(len(SWMA_time), dtype=int)
Helios_flag_SWMA = np.zeros(len(SWMA_time), dtype=int)
RC_flag_SWMA = np.zeros(len(SWMA_time), dtype=int)

# Loop through Shock events, define a ±15 second window
for _, row in Shock_SWMA.iterrows():
    center = pd.to_datetime(row['Time'])
    start = center - timedelta(seconds=15)
    end = center + timedelta(seconds=15)
    Shock_flag_SWMA[(SWMA_time >= start) & (SWMA_time <= end)] = 1

# Loop through each start-end interval in Helios_SWMA
for _, row in Helios_SWMA.iterrows():
    start = pd.to_datetime(row['icme_start_time'])
    end = pd.to_datetime(row['mo_end_time'])
    Helios_flag_SWMA[(SWMA_time >= start) & (SWMA_time <= end)] = 1

# Loop through each start-end interval in RC_SWMA
for _, row in RC_SWMA.iterrows():
    start = pd.to_datetime(row['ICME_Start'])
    end = pd.to_datetime(row['ICME_End'])
    RC_flag_SWMA[(SWMA_time >= start) & (SWMA_time <= end)] = 1

SWMA_flag_df = pd.DataFrame({
    'SWMA_time': SWMA_time,
    'Shock_flag': Shock_flag_SWMA,
    'Helios_flag': Helios_flag_SWMA,
    'RC_flag': RC_flag_SWMA
})

# Save the DataFrame to a CSV file
SWMA_flag_df.to_csv('SWMA_flags.csv', index=False)