In [2]:
import numpy as np
import pandas as pd
import astropy.constants as c
from sw_functions import download_sc_dataset, get_trajectory

  from .autonotebook import tqdm as notebook_tqdm


Download all data for the entire missions of PSP, SolO, Ace and save them in usable Pandas DataFrame. Includes $B_r$, $B_t$, $B_n$, $B$, $V_{sw}$, $T_p$, $N_p$.

In [2]:
# Keep this here for now, bad practice
import warnings
warnings.filterwarnings("ignore")

# PSP data

## Magnetic field

In [None]:
# Define start, stop, download the data
psp_start, psp_end = '2018-10-01', '2025-08-31'
psp_mag = download_sc_dataset('psp', 'mag', (psp_start, psp_end))

In [None]:
# Rename columns
psp_mag.rename(columns={'psp_fld_l2_mag_RTN_1min_0': 'B_r', 'psp_fld_l2_mag_RTN_1min_1': 'B_t', 'psp_fld_l2_mag_RTN_1min_2': 'B_n'}, inplace=True)

# Create the Total Magnitude Column
psp_mag['total_B'] = np.sqrt(psp_mag['B_r']**2 + psp_mag['B_t']**2 + psp_mag['B_n']**2)

# Save 1 min data
psp_mag.to_pickle('data/psp_mag_rtn_1min')

# Resample per hour
psp_mag_hour = psp_mag.resample('h').mean()

# Save 1 hour data
psp_mag_hour.to_pickle('data/psp_mag_rtn_1hour')

## Solar Wind

In [18]:
def download_by_year(year, sc_name):

    months = [(f'{year}-01-01', f'{year}-01-31'), (f'{year}-02-01', f'{year}-02-28'), (f'{year}-03-01', f'{year}-03-31'),(f'{year}-04-01', f'{year}-04-30'), (f'{year}-05-01', f'{year}-05-31'), (f'{year}-06-01', f'{year}-06-30'), (f'{year}-07-01', f'{year}-07-31'), (f'{year}-08-01', f'{year}-08-31'), (f'{year}-09-01', f'{year}-09-30'), (f'{year}-10-01', f'{year}-10-31'), (f'{year}-11-01', f'{year}-11-30'), (f'{year}-12-01', f'{year}-12-31')]

    # Account for leap years
    if year in ['2020', '2024']:
        months[1] = (f'{year}-02-01', f'{year}-02-29')

    # End Date
    if year == '2025':
        months = months[:5]
    
    # PSP specific dates
    if sc_name == 'psp':
        # Start Date
        if year == '2018':
            months = months[9:]
        
        # No June, No November
        if year == '2019':
            months = [(f'{year}-01-01', f'{year}-01-31'), (f'{year}-02-01', f'{year}-02-28'), (f'{year}-03-01', f'{year}-03-31'),(f'{year}-04-01', f'{year}-04-30'), (f'{year}-05-01', f'{year}-05-31'), (f'{year}-07-01', f'{year}-07-31'), (f'{year}-08-01', f'{year}-08-31'), (f'{year}-09-01', f'{year}-09-30'), (f'{year}-10-01', f'{year}-10-31'), (f'{year}-12-01', f'{year}-12-31')]

    # SolO specific dates
    if sc_name == 'solo':
        # Only July-August-September-October
        if year == '2020':
            months = [(f'{year}-07-01', f'{year}-07-31'), (f'{year}-08-01', f'{year}-08-31'), (f'{year}-09-01', f'{year}-09-30'), (f'{year}-10-01', f'{year}-10-31')]
        
        # Starts April
        if year == '2021':
            months = [(f'{year}-04-01', f'{year}-04-30'), (f'{year}-05-01', f'{year}-05-31'), (f'{year}-06-01', f'{year}-06-30'), (f'{year}-07-01', f'{year}-07-31'), (f'{year}-08-01', f'{year}-08-31'), (f'{year}-09-01', f'{year}-09-30'), (f'{year}-10-01', f'{year}-10-31'), (f'{year}-11-01', f'{year}-11-30'), (f'{year}-12-01', f'{year}-12-31')]

    month_list = []
    for month in months:
        month_list.append(download_sc_dataset(sc_name, 'sw', month))
        print(month, 'Done')

    year_df = pd.concat(month_list, sort=False)
    year_df.to_pickle(f'data/{sc_name}_sw{year}')
    print(year, 'Done')

    return year_df

def format_psp_data(psp_data):
    
    # Bulk speed magnitude
    psp_data['Vp'] = np.sqrt(psp_data['vp_moment_RTN_0']**2 + psp_data['vp_moment_RTN_1']**2 + psp_data['vp_moment_RTN_2']**2)

    # Convert Proton thermal speed to Proton temperature (eV)
    p_mass = c.m_p.value
    kb = c.k_B.value
    kb_eV = kb * 1/(c.e.value)

    psp_data['Tp'] = (psp_data['wp_moment']*1000)**2*p_mass/(2*kb) * kb_eV

    # Add Proton Density column
    psp_data['Np'] = psp_data['np_moment']

    return psp_data

In [None]:
# Download data by monthly chunks and save them in yearly files and global file
psp_sw_lst = []

for year in ['2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']:
    psp_sw_lst.append(download_by_year(year, 'psp'))

psp_sw = pd.concat(psp_sw_lst, sort=False)
psp_sw.to_pickle('data/psp_sweap_l3i_30sec')

In [None]:
# Format the global file
psp_sw = pd.concat([pd.read_pickle(f'data/psp_sw{year}') for year in ['2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']], sort=False)

psp_sw_formatted = format_psp_data(psp_sw)

# Save 30ish seconds data
psp_sw_formatted.to_pickle('data/psp_sweap_l3i_30sec')

In [None]:
# Resample into an hourly averaged file and format it
psp_sw_hour_lst = []
for year in ['2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']:
    psp_sw = pd.read_pickle(f'data/psp_sw{year}')
    psp_sw_hour_lst.append(psp_sw.resample('h').mean())

psp_sw_hour = pd.concat(psp_sw_hour_lst, sort=False)

psp_sw_formatted = format_psp_data(psp_sw_hour)

psp_sw_hour.to_pickle('data/psp_sweap_l3i_1hour')

# SolO Data

## Magnetic Field

In [6]:
# Define start, stop, download the data
solo_start, solo_end = '2020-04-15', '2025-05-31'
solo_mag = download_sc_dataset('solo', 'mag', (solo_start, solo_end))

python(14572) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Files Downloaded:   0%|          | 0/1741 [00:00<?, ?file/s]

[A[A
[A


[A[A[A



[A[A[A[A

[A[A


[A[A[A

[A[A


[A[A[A

[A[A



[A[A[A[A
Files Downloaded:   0%|          | 1/1741 [00:00<22:20,  1.30file/s]



[A[A[A[A


[A[A[A

[A[A

[A[A



[A[A[A[A


[A[A[A
[A
[A

[A[A



[A[A[A[A


[A[A[A
[A


[A[A[A

[A[A



[A[A[A[A


[A[A[A



Files Downloaded:   0%|          | 6/1741 [00:01<04:08,  6.99file/s]
[A
[A

[A[A

[A[A


[A[A[A



[A[A[A[A
[A

[A[A


[A[A[A


Files Downloaded:   1%|          | 11/1741 [00:01<02:35, 11.12file/s]



[A[A[A[A



[A[A[A[A
[A
[A

[A[A

Files Downloaded:   1%|          | 14/1741 [00:01<02:03, 13.95file/s]


[A[A[A



[A[A[A[A
[A

[A[A


[A[A[A



[A[A[A[A


[A[A[A



Files Downloaded:   1%|          | 17/1741 [00:01<01:52, 15.28file/s]
[A
[A


In [None]:
solo_mag.rename(columns={'B_RTN_0': 'B_r', 'B_RTN_1': 'B_t', 'B_RTN_2': 'B_n'}, inplace=True)

solo_mag['total_B'] = np.sqrt(solo_mag['B_r']**2 + solo_mag['B_t']**2 + solo_mag['B_n']**2)

# Save 1 min data
solo_mag.to_pickle('data/solo_mag_rtn_1min')

# Resample per hour average
solo_mag_hour = solo_mag.resample('h').mean()

# Save hourly-averaged data
solo_mag_hour.to_pickle('data/solo_mag_rtn_1hour')

## Solar Wind

In [6]:
def format_solo_data(solo_data):
    
    # Rename Temperature and proton density columns
    solo_data.rename(columns={'T': 'Tp', 'N': 'Np'}, inplace=True)

    # Bulk speed magnitude
    solo_data['Vp'] = np.sqrt(solo_data['V_RTN_0']**2 + solo_data['V_RTN_1']**2 + solo_data['V_RTN_2']**2)

    return solo_data

In [None]:
# Download data by monthly chunks and save them in yearly file and global file
solo_sw_lst = []

for year in ['2020', '2021', '2022', '2023', '2024', '2025']:
    solo_sw_lst.append(download_by_year(year, 'solo'))

solo_sw = pd.concat(solo_sw_lst, sort=False)

Files Downloaded: 100%|██████████| 20/20 [00:00<00:00, 29.59file/s]


('2020-07-01', '2020-07-31') Done


Files Downloaded: 100%|██████████| 18/18 [00:00<00:00, 27.36file/s]


('2020-08-01', '2020-08-31') Done


Files Downloaded: 100%|██████████| 21/21 [00:00<00:00, 27.14file/s]


('2020-09-01', '2020-09-30') Done


Files Downloaded: 100%|██████████| 25/25 [00:00<00:00, 32.19file/s]


('2020-10-01', '2020-10-31') Done
2020 Done


Files Downloaded: 100%|██████████| 13/13 [00:00<00:00, 23.70file/s]


('2021-04-01', '2021-04-30') Done


Files Downloaded: 100%|██████████| 29/29 [00:00<00:00, 30.93file/s]


('2021-05-01', '2021-05-31') Done


Files Downloaded: 100%|██████████| 9/9 [00:00<00:00, 20.33file/s]


('2021-06-01', '2021-06-30') Done


Files Downloaded: 100%|██████████| 26/26 [00:00<00:00, 29.22file/s]


('2021-07-01', '2021-07-31') Done


Files Downloaded: 100%|██████████| 30/30 [00:00<00:00, 33.94file/s]


('2021-08-01', '2021-08-31') Done


Files Downloaded: 100%|██████████| 11/11 [00:00<00:00, 19.80file/s]


('2021-09-01', '2021-09-30') Done


Files Downloaded: 100%|██████████| 27/27 [00:00<00:00, 30.09file/s]


('2021-10-01', '2021-10-31') Done


Files Downloaded: 100%|██████████| 30/30 [00:00<00:00, 33.17file/s]


('2021-11-01', '2021-11-30') Done


Files Downloaded: 100%|██████████| 28/28 [00:00<00:00, 31.42file/s]


('2021-12-01', '2021-12-31') Done
2021 Done


Files Downloaded: 100%|██████████| 25/25 [00:00<00:00, 31.74file/s]


('2022-01-01', '2022-01-31') Done


Files Downloaded: 100%|██████████| 22/22 [00:00<00:00, 27.77file/s]


('2022-02-01', '2022-02-28') Done


Files Downloaded: 100%|██████████| 30/30 [00:00<00:00, 33.12file/s]


('2022-03-01', '2022-03-31') Done


Files Downloaded: 100%|██████████| 17/17 [00:00<00:00, 25.20file/s]


('2022-04-01', '2022-04-30') Done


Files Downloaded: 100%|██████████| 21/21 [00:00<00:00, 25.54file/s]


('2022-05-01', '2022-05-31') Done


Files Downloaded: 100%|██████████| 29/29 [00:01<00:00, 28.60file/s]


('2022-06-01', '2022-06-30') Done


Files Downloaded: 100%|██████████| 30/30 [00:01<00:00, 27.51file/s]


('2022-07-01', '2022-07-31') Done


Files Downloaded: 100%|██████████| 30/30 [00:01<00:00, 26.39file/s]


('2022-08-01', '2022-08-31') Done


Files Downloaded: 100%|██████████| 28/28 [00:01<00:00, 24.75file/s]


('2022-09-01', '2022-09-30') Done


Files Downloaded: 100%|██████████| 28/28 [00:00<00:00, 31.76file/s]


('2022-10-01', '2022-10-31') Done


Files Downloaded: 100%|██████████| 20/20 [00:00<00:00, 29.95file/s]


('2022-11-01', '2022-11-30') Done


Files Downloaded: 100%|██████████| 27/27 [00:00<00:00, 30.57file/s]


('2022-12-01', '2022-12-31') Done
2022 Done


Files Downloaded: 100%|██████████| 29/29 [00:00<00:00, 32.93file/s]


('2023-01-01', '2023-01-31') Done


Files Downloaded: 100%|██████████| 25/25 [00:00<00:00, 32.07file/s]


('2023-02-01', '2023-02-28') Done


Files Downloaded: 100%|██████████| 27/27 [00:00<00:00, 30.06file/s]


('2023-03-01', '2023-03-31') Done


Files Downloaded: 100%|██████████| 29/29 [00:01<00:00, 23.70file/s]


('2023-04-01', '2023-04-30') Done


Files Downloaded: 100%|██████████| 9/9 [00:00<00:00, 13.77file/s]


('2023-05-01', '2023-05-31') Done


Files Downloaded: 100%|██████████| 28/28 [00:01<00:00, 20.17file/s]


('2023-06-01', '2023-06-30') Done


Files Downloaded: 100%|██████████| 29/29 [00:01<00:00, 23.74file/s]


('2023-07-01', '2023-07-31') Done


Files Downloaded: 100%|██████████| 21/21 [00:00<00:00, 21.64file/s]


('2023-08-01', '2023-08-31') Done


Files Downloaded: 100%|██████████| 29/29 [00:00<00:00, 32.02file/s]


('2023-09-01', '2023-09-30') Done


Files Downloaded: 100%|██████████| 30/30 [00:00<00:00, 33.73file/s]


('2023-10-01', '2023-10-31') Done


Files Downloaded: 100%|██████████| 25/25 [00:00<00:00, 31.99file/s]


('2023-11-01', '2023-11-30') Done


Files Downloaded: 100%|██████████| 29/29 [00:00<00:00, 33.01file/s]


('2023-12-01', '2023-12-31') Done
2023 Done


Files Downloaded: 100%|██████████| 21/21 [00:00<00:00, 27.72file/s]


('2024-01-01', '2024-01-31') Done


Files Downloaded: 100%|██████████| 29/29 [00:00<00:00, 31.89file/s]


('2024-02-01', '2024-02-29') Done


Files Downloaded: 100%|██████████| 29/29 [00:00<00:00, 32.35file/s]


('2024-03-01', '2024-03-31') Done


Files Downloaded: 100%|██████████| 30/30 [00:01<00:00, 24.24file/s]


('2024-04-01', '2024-04-30') Done


Files Downloaded: 100%|██████████| 31/31 [00:01<00:00, 23.10file/s]


('2024-05-01', '2024-05-31') Done


Files Downloaded: 100%|██████████| 29/29 [00:01<00:00, 23.05file/s]


('2024-06-01', '2024-06-30') Done


Files Downloaded: 100%|██████████| 31/31 [00:00<00:00, 31.16file/s]


('2024-07-01', '2024-07-31') Done


Files Downloaded: 100%|██████████| 31/31 [00:01<00:00, 29.88file/s]


('2024-08-01', '2024-08-31') Done


Files Downloaded: 100%|██████████| 30/30 [00:00<00:00, 33.02file/s]


('2024-09-01', '2024-09-30') Done


Files Downloaded: 100%|██████████| 31/31 [00:01<00:00, 30.74file/s]


('2024-10-01', '2024-10-31') Done


Files Downloaded: 100%|██████████| 29/29 [00:00<00:00, 29.33file/s]


('2024-11-01', '2024-11-30') Done


Files Downloaded: 100%|██████████| 31/31 [00:01<00:00, 30.61file/s]


('2024-12-01', '2024-12-31') Done
2024 Done


Files Downloaded: 100%|██████████| 31/31 [00:01<00:00, 30.54file/s]


('2025-01-01', '2025-01-31') Done


Files Downloaded: 100%|██████████| 28/28 [00:00<00:00, 31.48file/s]


('2025-02-01', '2025-02-28') Done


Files Downloaded: 100%|██████████| 31/31 [00:01<00:00, 23.13file/s]


('2025-03-01', '2025-03-31') Done


Files Downloaded: 100%|██████████| 29/29 [00:01<00:00, 24.87file/s]


('2025-04-01', '2025-04-30') Done


Files Downloaded: 100%|██████████| 31/31 [00:01<00:00, 23.60file/s]


('2025-05-01', '2025-05-31') Done
2025 Done


In [None]:
# Format the global file
solo_sw = pd.concat([pd.read_pickle(f'data/solo_sw{year}') for year in ['2020', '2021', '2022', '2023', '2024', '2025']], sort=False)

solo_sw_formatted = format_solo_data(solo_sw)

# Save 4 sec data
solo_sw_formatted.to_pickle('data/solo_swa_pas_grnd_mom_l2_4sec')

In [None]:
# Resample into an hourly averaged file and format it
solo_sw_hour_lst = []
for year in ['2020', '2021', '2022', '2023', '2024', '2025']:
    solo_sw = pd.read_pickle(f'data/solo_sw{year}')
    solo_sw_hour_lst.append(solo_sw.resample('h').mean())

solo_sw_hour = pd.concat(solo_sw_hour_lst, sort=False)

solo_sw_formatted = format_solo_data(solo_sw_hour)

solo_sw_hour.to_pickle('data/solo_swa_pas_grnd_mom_l2_1hour')

# ACE Data

## Magnetic Field

## Solar Wind