This Notebook is based on this notebook: https://colab.research.google.com/drive/1_zJMGJnX3XJx7FCHD04SC3Y0KCMdDGMz#scrollTo=chBHYxkg4hFp

In [10]:
import pickle
import pandas as pd
import numpy as np
import astropy
import sunpy
from sunpy.time import parse_time
import matplotlib.pyplot as plt
import seaborn as sns
#make figures bigger
plt.rcParams["figure.figsize"] = (10,5)

In [11]:
url='https://helioforecast.space/static/sync/icmecat/HELIO4CAST_ICMECAT_v23.csv'
ic=pd.read_csv(url)
ic=ic.drop(columns='Unnamed: 0') #drop an extra index column
# download the full dataset
ic.to_csv('HELIO4CAST_ICMECAT_v23.csv',index=False) #save to local

In [12]:
ic[ic.isna().any(axis=1)] #check for missing values

Unnamed: 0,icmecat_id,sc_insitu,icme_start_time,mo_start_time,mo_end_time,mo_sc_heliodistance,mo_sc_long_heeq,mo_sc_lat_heeq,icme_duration,icme_bmax,...,mo_density_mean,mo_density_std,mo_temperature_mean,mo_temperature_std,sheath_speed_mean,sheath_speed_std,sheath_density_mean,sheath_density_std,sheath_pdyn_mean,sheath_pdyn_std
1,ICME_STEREO_A_MOESTL_20241218_01,STEREO-A,2024-12-18T06:17Z,2024-12-18T22:32Z,2024-12-20T02:00Z,0.9672,28.54,-4.79,43.72,24.8,...,2.2,0.7,0.0,0.0,338.7,23.2,14.1,5.1,2.6,0.8
3,ICME_BEPI_MOESTL_20241216_01,BepiColombo,2024-12-16T04:02Z,2024-12-16T06:39Z,2024-12-16T15:39Z,0.3343,51.21,-1.93,11.62,87.8,...,,,,,,,,,,
4,ICME_Wind_MOESTL_20241207_01,Wind,2024-12-07T23:14Z,2024-12-07T23:14Z,2024-12-08T17:50Z,0.9746,-0.17,0.02,18.60,10.2,...,7.8,3.6,65012.1,33690.2,,,,,,
6,ICME_BEPI_MOESTL_20241126_01,BepiColombo,2024-11-26T23:28Z,2024-11-27T05:59Z,2024-11-27T19:39Z,0.3333,-43.74,3.84,20.18,70.5,...,,,,,,,,,,
7,ICME_BEPI_MOESTL_20241122_01,BepiColombo,2024-11-22T21:38Z,2024-11-22T22:34Z,2024-11-23T00:46Z,0.3569,-61.22,3.70,3.13,65.2,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1896,ICME_ULY_RICHARDSON_19910305_01,ULYSSES,1991-03-05T00:00Z,1991-03-05T00:00Z,1991-03-08T20:00Z,2.2833,-41.33,-3.42,92.00,4.7,...,1.3,1.1,25501.7,11677.5,,,,,,
1897,ICME_ULY_RICHARDSON_19910226_01,ULYSSES,1991-02-26T22:00Z,1991-02-26T22:00Z,1991-02-27T17:00Z,2.2166,-36.80,-3.26,19.00,4.7,...,0.9,0.4,65951.4,24933.2,,,,,,
1898,ICME_ULY_RICHARDSON_19910116_01,ULYSSES,1991-01-16T16:00Z,1991-01-16T16:00Z,1991-01-18T11:00Z,1.7583,-9.09,-1.75,43.00,3.1,...,0.9,0.1,55306.4,21032.7,,,,,,
1899,ICME_ULY_RICHARDSON_19901225_01,ULYSSES,1990-12-25T14:00Z,1990-12-25T14:00Z,1990-12-28T12:00Z,1.5160,2.18,-0.49,70.00,5.4,...,1.8,1.1,52144.3,36250.8,,,,,,


In [13]:
print(ic.keys())
ic.head()

Index(['icmecat_id', 'sc_insitu', 'icme_start_time', 'mo_start_time',
       'mo_end_time', 'mo_sc_heliodistance', 'mo_sc_long_heeq',
       'mo_sc_lat_heeq', 'icme_duration', 'icme_bmax', 'icme_bmean',
       'icme_bstd', 'icme_speed_mean', 'icme_speed_std', 'mo_duration',
       'mo_bmax', 'mo_bmean', 'mo_bstd', 'mo_bzmean', 'mo_bzmin', 'mo_bzstd',
       'mo_bymean', 'mo_bystd', 'mo_speed_mean', 'mo_speed_std',
       'mo_expansion_speed', 'mo_pdyn_mean', 'mo_pdyn_std', 'mo_density_mean',
       'mo_density_std', 'mo_temperature_mean', 'mo_temperature_std',
       'sheath_speed_mean', 'sheath_speed_std', 'sheath_density_mean',
       'sheath_density_std', 'sheath_pdyn_mean', 'sheath_pdyn_std'],
      dtype='object')


Unnamed: 0,icmecat_id,sc_insitu,icme_start_time,mo_start_time,mo_end_time,mo_sc_heliodistance,mo_sc_long_heeq,mo_sc_lat_heeq,icme_duration,icme_bmax,...,mo_density_mean,mo_density_std,mo_temperature_mean,mo_temperature_std,sheath_speed_mean,sheath_speed_std,sheath_density_mean,sheath_density_std,sheath_pdyn_mean,sheath_pdyn_std
0,ICME_STEREO_A_MOESTL_20241220_01,STEREO-A,2024-12-20T11:32Z,2024-12-20T14:44Z,2024-12-20T22:34Z,0.9672,28.58,-4.96,11.03,12.8,...,3.0,1.4,0.0,0.0,363.6,7.7,8.9,2.7,2.0,0.6
1,ICME_STEREO_A_MOESTL_20241218_01,STEREO-A,2024-12-18T06:17Z,2024-12-18T22:32Z,2024-12-20T02:00Z,0.9672,28.54,-4.79,43.72,24.8,...,2.2,0.7,0.0,0.0,338.7,23.2,14.1,5.1,2.6,0.8
2,ICME_Wind_MOESTL_20241217_01,Wind,2024-12-17T04:28Z,2024-12-17T05:28Z,2024-12-17T14:16Z,0.9732,-0.1,-1.17,9.8,32.0,...,20.3,10.0,297500.0,323800.5,541.5,19.5,42.2,6.7,20.9,4.5
3,ICME_BEPI_MOESTL_20241216_01,BepiColombo,2024-12-16T04:02Z,2024-12-16T06:39Z,2024-12-16T15:39Z,0.3343,51.21,-1.93,11.62,87.8,...,,,,,,,,,,
4,ICME_Wind_MOESTL_20241207_01,Wind,2024-12-07T23:14Z,2024-12-07T23:14Z,2024-12-08T17:50Z,0.9746,-0.17,0.02,18.6,10.2,...,7.8,3.6,65012.1,33690.2,,,,,,


In [14]:
# list all unique values in the sc_insitu column
print(ic['sc_insitu'].unique())


['STEREO-A' 'Wind' 'BepiColombo' 'SolarOrbiter' 'PSP' 'MAVEN' 'Juno'
 'MESSENGER' 'VEX' 'STEREO-B' 'ULYSSES']


# Convert Spacecraft coordinates from HEEQ to GSE

In [15]:
print(ic[['mo_start_time', 'mo_sc_heliodistance', 'mo_sc_long_heeq', 'mo_sc_lat_heeq']].head())

       mo_start_time  mo_sc_heliodistance  mo_sc_long_heeq  mo_sc_lat_heeq
0  2024-12-20T14:44Z               0.9672            28.58           -4.96
1  2024-12-18T22:32Z               0.9672            28.54           -4.79
2  2024-12-17T05:28Z               0.9732            -0.10           -1.17
3  2024-12-16T06:39Z               0.3343            51.21           -1.93
4  2024-12-07T23:14Z               0.9746            -0.17            0.02


In [16]:
import pandas as pd
import numpy as np
from astropy.time import Time
from astropy.coordinates import SkyCoord
import sunpy.coordinates
from sunpy.coordinates import frames
from astropy import units as u

# Convert to datetime
ic['sc_time'] = pd.to_datetime(ic['mo_start_time']).dt.tz_convert(None)
times = Time(ic['sc_time'].values)
# also convert these even though they are not used here
ic['icme_start_time'] = pd.to_datetime(ic['icme_start_time']).dt.tz_convert(None)
ic['mo_start_time'] = pd.to_datetime(ic['mo_start_time']).dt.tz_convert(None)
ic['mo_end_time'] = pd.to_datetime(ic['mo_end_time']).dt.tz_convert(None)

# spherical HEEQ (r, lon, lat)
r = ic['mo_sc_heliodistance'].values * u.AU
lon = ic['mo_sc_long_heeq'].values * u.deg
lat = ic['mo_sc_lat_heeq'].values * u.deg

# Define HEEQ using HeliographicStonyhurst
# see https://docs.sunpy.org/en/stable/reference/coordinates/index.html for reference
heeq = SkyCoord(
    lon=lon,
    lat=lat,
    radius=r,
    frame=frames.HeliographicStonyhurst,
    obstime=times,
    representation_type='spherical'
)

# Convert to GSE
gse = heeq.transform_to(frames.GeocentricSolarEcliptic(obstime=times))
gse_cartesian = gse.cartesian

# Extract in Earth radii
x_gse_RE = gse_cartesian.x.to(u.R_earth).value
y_gse_RE = gse_cartesian.y.to(u.R_earth).value
z_gse_RE = gse_cartesian.z.to(u.R_earth).value

# Store in DataFrame
ic['mo_sc_GSE_x'] = x_gse_RE
ic['mo_sc_GSE_y'] = y_gse_RE
ic['mo_sc_GSE_z'] = z_gse_RE

# Preview the result
print(ic[['sc_time', 'mo_sc_GSE_x', 'mo_sc_GSE_y', 'mo_sc_GSE_z']].head())

              sc_time   mo_sc_GSE_x   mo_sc_GSE_y  mo_sc_GSE_z
0 2024-12-20 14:44:00   3179.626525 -10900.886565   -51.258397
1 2024-12-18 22:32:00   3176.694630 -10890.584955   -49.613606
2 2024-12-17 05:28:00    254.059836     41.829447    13.441353
3 2024-12-16 06:39:00  18168.807162  -6081.640008   593.532185
4 2024-12-07 23:14:00    245.660403     69.997092    12.775244


# Save to .csv

In [17]:
# save the data to a csv file based on the sc_insitu values ('Wind')
filtered_ic = ic[(ic['sc_insitu'] == 'Wind')]
filtered_ic.to_csv('helio4cast_icmecat.csv', index=False)

Resize to time range of GFOC

In [1]:
import pandas as pd

filtered_ic = pd.read_csv('helio4cast_icmecat.csv')

GFOC_start = pd.to_datetime('2023-01-03 12:00:00')
GFOC_end = pd.to_datetime('2024-06-30 11:59:30')

# only select times that are in the range of the GFOC data
ic_GFOC = filtered_ic[pd.to_datetime(filtered_ic['mo_end_time'], format='%Y-%m-%d %H:%M:%S') >= GFOC_start]
ic_GFOC = ic_GFOC[pd.to_datetime(ic_GFOC['icme_start_time'], format='%Y-%m-%d %H:%M:%S') <= GFOC_end]

# save the modified DataFrame to a new CSV file
ic_GFOC.to_csv('helio4cast_icmecat_GFOC.csv', index=False)