In [2]:
import numpy as np
import pandas as pd
import astropy.constants as c
from sw_functions import download_sc_dataset, get_trajectory

  from .autonotebook import tqdm as notebook_tqdm


Download all data for the entire missions of PSP, SolO, Ace and save them in usable Pandas DataFrame. Includes $B_r$, $B_t$, $B_n$, $B$, $V_{sw}$, $T_p$, $N_p$.

In [2]:
# Keep this here for now, bad practice
import warnings
warnings.filterwarnings("ignore")

# PSP data

## Magnetic field

In [None]:
# Define start, stop, download the data
psp_start, psp_end = '2018-10-01', '2025-08-31'
psp_mag = download_sc_dataset('psp', 'mag', (psp_start, psp_end))

In [None]:
# Rename columns
psp_mag.rename(columns={'psp_fld_l2_mag_RTN_1min_0': 'B_r', 'psp_fld_l2_mag_RTN_1min_1': 'B_t', 'psp_fld_l2_mag_RTN_1min_2': 'B_n'}, inplace=True)

# Create the Total Magnitude Column
psp_mag['total_B'] = np.sqrt(psp_mag['B_r']**2 + psp_mag['B_t']**2 + psp_mag['B_n']**2)

# Save 1 min data
psp_mag.to_pickle('data/psp_mag_rtn_1min')

# Resample per hour
psp_mag_hour = psp_mag.resample('h').mean()

# Save 1 hour data
psp_mag_hour.to_pickle('data/psp_mag_rtn_1hour')

## Solar Wind

In [None]:
def download_by_year(year):

    months = [(f'{year}-01-01', f'{year}-01-31'), (f'{year}-02-01', f'{year}-02-28'), (f'{year}-03-01', f'{year}-03-31'),(f'{year}-04-01', f'{year}-04-30'), (f'{year}-05-01', f'{year}-05-31'), (f'{year}-06-01', f'{year}-06-30'), (f'{year}-07-01', f'{year}-07-31'), (f'{year}-08-01', f'{year}-08-31'), (f'{year}-09-01', f'{year}-09-30'), (f'{year}-10-01', f'{year}-10-31'), (f'{year}-11-01', f'{year}-11-30'), (f'{year}-12-01', f'{year}-12-31')]

    # Account for leap years
    if year in ['2020', '2024']:
        months[1] = (f'{year}-02-01', f'{year}-02-29')

    # Start Date
    if year == '2018':
        months = months[9:]
    
    # No June, No November
    if year == '2019':
        months = months = [(f'{year}-01-01', f'{year}-01-31'), (f'{year}-02-01', f'{year}-02-28'), (f'{year}-03-01', f'{year}-03-31'),(f'{year}-04-01', f'{year}-04-30'), (f'{year}-05-01', f'{year}-05-31'), (f'{year}-07-01', f'{year}-07-31'), (f'{year}-08-01', f'{year}-08-31'), (f'{year}-09-01', f'{year}-09-30'), (f'{year}-10-01', f'{year}-10-31'), (f'{year}-12-01', f'{year}-12-31')]

    # End Date
    if year == '2025':
        months = months[:5]

    month_list = []

    for month in months:
        month_list.append(download_sc_dataset('psp', 'sw', month))
        print(month, 'Done')

    year_df = pd.concat(month_list, sort=False)
    year_df.to_pickle(f'data/psp_sw{year}')
    print(year, 'Done')

    return year_df

def format_psp_data(psp_data):
    
    # Bulk speed magnitude
    psp_data['Vp'] = np.sqrt(psp_data['vp_moment_RTN_0']**2 + psp_data['vp_moment_RTN_1']**2 + psp_data['vp_moment_RTN_2']**2)

    # Convert Proton thermal speed to Proton temperature (eV)
    p_mass = c.m_p.value
    kb = c.k_B.value
    kb_eV = kb * 1/(c.e.value)

    psp_data['Tp'] = (psp_data['wp_moment']*1000)**2*p_mass/(2*kb) * kb_eV

    # Add Proton Density column
    psp_data['Np'] = psp_data['np_moment']

    return psp_data

In [None]:
# Download data by monthly chunks and save them in yearly file and global file
psp_sw_lst = []

for year in ['2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']:
    psp_sw_lst.append(download_by_year(year))

psp_sw = pd.concat(psp_sw_lst, sort=False)
psp_sw.to_pickle('data/psp_sweap_l3i_30sec')

In [None]:
# Format the global file
psp_sw = pd.concat([pd.read_pickle(f'data/psp_sw{year}') for year in ['2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']], sort=False)

psp_sw_formatted = format_psp_data(psp_sw)

# Save 30ish seconds data
psp_sw_formatted.to_pickle('data/psp_sweap_l3i_30sec')

In [None]:
# Resample into an hourly averaged file and format it
psp_sw_hour_lst = []
for year in ['2018', '2019', '2020', '2021', '2022', '2023', '2024', '2025']:
    psp_sw = pd.read_pickle(f'data/psp_sw{year}')
    psp_sw_hour_lst.append(psp_sw.resample('h').mean())

psp_sw_hour = pd.concat(psp_sw_hour_lst, sort=False)

psp_sw_formatted = format_psp_data(psp_sw_hour)

psp_sw_hour.to_pickle('data/psp_sweap_l3i_1hour')

# SolO Data

## Magnetic Field

## Solar Wind

# ACE Data

## Magnetic Field

## Solar Wind