# Alpha Particle Data Integration with PySpedas

This notebook demonstrates how to download and work with PSP alpha particle data using pyspedas.
We'll use the same date range as the WIND MFI test: 2022/06/01 20:00:00.000 to 2022/06/02 02:00:00.000


In [3]:
# Cell 1: Download alpha data and determine file paths
import pyspedas
import os
import cdflib

# Define the same date range as WIND MFI test
trange = ['2022/06/01 20:00:00.000', '2022/06/02 02:00:00.000']
spi_sf0a_datatype = 'spi_sf0a_l3_mom'  # Alpha particle moments

print(f"Downloading PSP alpha data for time range: {trange}")
print(f"Datatype: {spi_sf0a_datatype}")

# Download with downloadonly=True and notplot=True
downloaded_files = pyspedas.psp.spi(
    trange=trange, 
    datatype=spi_sf0a_datatype, 
    level='l3', 
    time_clip=True,
    downloadonly=True,  # Only download, don't load into memory
    notplot=True        # Don't create plots
)

print(f"\nDownload completed. Files returned: {len(downloaded_files) if downloaded_files else 0}")

if downloaded_files:
    for i, file_path in enumerate(downloaded_files):
        print(f"File {i+1}: {file_path}")
        
        # Get absolute path
        abs_path = os.path.abspath(file_path)
        print(f"  Absolute path: {abs_path}")
        
        # Check if file exists
        if os.path.exists(abs_path):
            file_size = os.path.getsize(abs_path) / (1024*1024)  # MB
            print(f"  File size: {file_size:.2f} MB")
            print(f"  File exists: Yes")
        else:
            print(f"  File exists: No")
        
        # Show directory structure
        directory = os.path.dirname(abs_path)
        print(f"  Directory: {directory}")
        
        print()
else:
    print("No files were downloaded or found.")


27-Jun-25 12:04:37: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/psp/sweap/spi/l3/spi_sf0a_l3_mom/2022/


Downloading PSP alpha data for time range: ['2022/06/01 20:00:00.000', '2022/06/02 02:00:00.000']
Datatype: spi_sf0a_l3_mom


27-Jun-25 12:04:38: File is current: psp_data/sweap/spi/l3/spi_sf0a_l3_mom/2022/psp_swp_spi_sf0a_l3_mom_20220601_v04.cdf
27-Jun-25 12:04:38: File is current: psp_data/sweap/spi/l3/spi_sf0a_l3_mom/2022/psp_swp_spi_sf0a_l3_mom_20220602_v04.cdf



Download completed. Files returned: 2
File 1: psp_data/sweap/spi/l3/spi_sf0a_l3_mom/2022/psp_swp_spi_sf0a_l3_mom_20220601_v04.cdf
  Absolute path: /Users/robertalexander/GitHub/Plotbot/psp_data/sweap/spi/l3/spi_sf0a_l3_mom/2022/psp_swp_spi_sf0a_l3_mom_20220601_v04.cdf
  File size: 30.40 MB
  File exists: Yes
  Directory: /Users/robertalexander/GitHub/Plotbot/psp_data/sweap/spi/l3/spi_sf0a_l3_mom/2022

File 2: psp_data/sweap/spi/l3/spi_sf0a_l3_mom/2022/psp_swp_spi_sf0a_l3_mom_20220602_v04.cdf
  Absolute path: /Users/robertalexander/GitHub/Plotbot/psp_data/sweap/spi/l3/spi_sf0a_l3_mom/2022/psp_swp_spi_sf0a_l3_mom_20220602_v04.cdf
  File size: 21.65 MB
  File exists: Yes
  Directory: /Users/robertalexander/GitHub/Plotbot/psp_data/sweap/spi/l3/spi_sf0a_l3_mom/2022



In [6]:
# Cell 2: Extract variable names from the CDF file
import cdflib

if downloaded_files and len(downloaded_files) > 0:
    # Use the first downloaded file
    cdf_file_path = downloaded_files[0]
    abs_cdf_path = os.path.abspath(cdf_file_path)
    
    print(f"Analyzing CDF file: {os.path.basename(abs_cdf_path)}")
    print(f"Full path: {abs_cdf_path}")
    print("="*80)
    
    try:
        # Open the CDF file
        with cdflib.CDF(abs_cdf_path) as cdf:
            # Get CDF info
            cdf_info = cdf.cdf_info()
            
            print(f"CDF File Info:")
            print(f"  CDF Version: {cdf_info.Version}")
            print(f"  Encoding: {getattr(cdf_info, 'Encoding', 'Unknown')}")
            print(f"  Majority: {getattr(cdf_info, 'Majority', 'Unknown')}")
            print(f"  Number of rDimensions: {getattr(cdf_info, 'Num_rdim', 0)}")
            print(f"  rDimension sizes: {getattr(cdf_info, 'rDim_sizes', [])}")
            print(f"  Number of zVariables: {len(cdf_info.zVariables)}")
            print(f"  Number of rVariables: {len(cdf_info.rVariables)}")
            print(f"  Compressed: {getattr(cdf_info, 'Compressed', 'Unknown')}")
            print(f"  Checksum: {getattr(cdf_info, 'Checksum', 'Unknown')}")
            print()
            
            # List all zVariables (most data variables)
            print("zVariables (data variables):")
            for i, var_name in enumerate(cdf_info.zVariables):
                try:
                    var_info = cdf.varinq(var_name)
                    print(f"  {i+1:2d}. {var_name:30s} - {var_info.Data_Type_Description:15s} - Shape: {var_info.Dim_Sizes}")
                except Exception as e:
                    print(f"  {i+1:2d}. {var_name:30s} - Error getting info: {e}")
            
            print()
            
            # List all rVariables (usually metadata)
            if cdf_info.rVariables:
                print("rVariables (metadata variables):")
                for i, var_name in enumerate(cdf_info.rVariables):
                    try:
                        var_info = cdf.varinq(var_name)
                        print(f"  {i+1:2d}. {var_name:30s} - {var_info.Data_Type_Description:15s} - Shape: {var_info.Dim_Sizes}")
                    except Exception as e:
                        print(f"  {i+1:2d}. {var_name:30s} - Error getting info: {e}")
            else:
                print("No rVariables found.")
            
            print()
            
            # Look specifically for alpha-related variables
            print("Alpha-related variables (containing 'alpha', 'na', or 'va'):")
            alpha_vars = []
            all_vars = cdf_info.zVariables + cdf_info.rVariables
            
            for var_name in all_vars:
                lower_name = var_name.lower()
                if any(keyword in lower_name for keyword in ['alpha', 'na', 'va', 'temp_alpha', 'vel_alpha']):
                    alpha_vars.append(var_name)
                    try:
                        var_info = cdf.varinq(var_name)
                        print(f"  • {var_name:30s} - {var_info.Data_Type_Description:15s} - Shape: {var_info.Dim_Sizes}")
                    except Exception as e:
                        print(f"  • {var_name:30s} - Error getting info: {e}")
            
            if not alpha_vars:
                print("  No obvious alpha-related variables found.")
            
            print()
            
            # Look for time variables
            print("Time variables (containing 'epoch' or 'time'):")
            time_vars = []
            for var_name in all_vars:
                lower_name = var_name.lower()
                if 'epoch' in lower_name or 'time' in lower_name:
                    time_vars.append(var_name)
                    try:
                        var_info = cdf.varinq(var_name)
                        print(f"  • {var_name:30s} - {var_info.Data_Type_Description:15s} - Shape: {var_info.Dim_Sizes}")
                    except Exception as e:
                        print(f"  • {var_name:30s} - Error getting info: {e}")
            
            if not time_vars:
                print("  No time variables found.")
            
            print()
            print(f"Total variables found: {len(all_vars)}")
            print(f"Alpha-related variables: {len(alpha_vars)}")
            print(f"Time variables: {len(time_vars)}")
            
    except Exception as e:
        print(f"Error reading CDF file: {e}")
        import traceback
        print(traceback.format_exc())
        
else:
    print("No CDF files available to analyze. Please run the download cell first.")


Analyzing CDF file: psp_swp_spi_sf0a_l3_mom_20220601_v04.cdf
Full path: /Users/robertalexander/GitHub/Plotbot/psp_data/sweap/spi/l3/spi_sf0a_l3_mom/2022/psp_swp_spi_sf0a_l3_mom_20220601_v04.cdf
CDF File Info:
  CDF Version: 3.7.1
  Encoding: 6
  Majority: Column_major
  Number of rDimensions: 0
  rDimension sizes: []
  Number of zVariables: 47
  Number of rVariables: 0
  Compressed: False
  Checksum: False

zVariables (data variables):
   1. Epoch                          - CDF_TIME_TT2000 - Shape: []
   2. TIME                           - CDF_DOUBLE      - Shape: []
   3. MET                            - CDF_DOUBLE      - Shape: []
   4. APID                           - CDF_UINT2       - Shape: []
   5. SEQN                           - CDF_UINT2       - Shape: []
   6. SEQN_DELTA                     - CDF_UINT2       - Shape: []
   7. SEQN_GROUP                     - CDF_UINT1       - Shape: []
   8. PKT_SIZE                       - CDF_UINT4       - Shape: []
   9. SOURCE_APID       

In [7]:
# Cell 3: Examine DENS and TEMP variables specifically
import numpy as np

if downloaded_files and len(downloaded_files) > 0:
    cdf_file_path = downloaded_files[0]
    abs_cdf_path = os.path.abspath(cdf_file_path)
    
    print(f"Examining DENS and TEMP variables in: {os.path.basename(abs_cdf_path)}")
    print("="*80)
    
    try:
        with cdflib.CDF(abs_cdf_path) as cdf:
            # Check DENS variable
            print("DENS (Alpha Particle Density):")
            try:
                dens_data = cdf.varget("DENS")
                print(f"  Data type: {type(dens_data)}")
                print(f"  Array shape: {dens_data.shape}")
                print(f"  Data length: {len(dens_data) if hasattr(dens_data, '__len__') else 'N/A'}")
                print(f"  Min value: {np.nanmin(dens_data):.6f}")
                print(f"  Max value: {np.nanmax(dens_data):.6f}")
                print(f"  Mean value: {np.nanmean(dens_data):.6f}")
                print(f"  Number of valid (non-NaN) values: {np.sum(~np.isnan(dens_data))}")
                print(f"  Number of NaN values: {np.sum(np.isnan(dens_data))}")
                print(f"  First 10 values: {dens_data[:10]}")
                
                # Get variable attributes
                dens_attrs = cdf.varattsget("DENS")
                if "UNITS" in dens_attrs:
                    print(f"  Units: {dens_attrs['UNITS']}")
                if "FIELDNAM" in dens_attrs:
                    print(f"  Field name: {dens_attrs['FIELDNAM']}")
                    
            except Exception as e:
                print(f"  Error reading DENS: {e}")
            
            print()
            
            # Check TEMP variable  
            print("TEMP (Alpha Particle Temperature):")
            try:
                temp_data = cdf.varget("TEMP")
                print(f"  Data type: {type(temp_data)}")
                print(f"  Array shape: {temp_data.shape}")
                print(f"  Data length: {len(temp_data) if hasattr(temp_data, '__len__') else 'N/A'}")
                print(f"  Min value: {np.nanmin(temp_data):.6f}")
                print(f"  Max value: {np.nanmax(temp_data):.6f}")
                print(f"  Mean value: {np.nanmean(temp_data):.6f}")
                print(f"  Number of valid (non-NaN) values: {np.sum(~np.isnan(temp_data))}")
                print(f"  Number of NaN values: {np.sum(np.isnan(temp_data))}")
                print(f"  First 10 values: {temp_data[:10]}")
                
                # Get variable attributes
                temp_attrs = cdf.varattsget("TEMP")
                if "UNITS" in temp_attrs:
                    print(f"  Units: {temp_attrs['UNITS']}")
                if "FIELDNAM" in temp_attrs:
                    print(f"  Field name: {temp_attrs['FIELDNAM']}")
                    
            except Exception as e:
                print(f"  Error reading TEMP: {e}")
            
            print()
            
            # Also check the time variable for context
            print("Epoch (Time variable for reference):")
            try:
                epoch_data = cdf.varget("Epoch")
                print(f"  Data type: {type(epoch_data)}")
                print(f"  Array shape: {epoch_data.shape}")
                print(f"  Data length: {len(epoch_data)}")
                print(f"  First timestamp: {cdflib.cdfepoch.to_datetime(epoch_data[0])}")
                print(f"  Last timestamp: {cdflib.cdfepoch.to_datetime(epoch_data[-1])}")
                print(f"  Total time span: {cdflib.cdfepoch.to_datetime(epoch_data[-1]) - cdflib.cdfepoch.to_datetime(epoch_data[0])}")
                
            except Exception as e:
                print(f"  Error reading Epoch: {e}")
            
            print()
            
            # Check if DENS and TEMP have the same length as time
            try:
                epoch_len = len(cdf.varget("Epoch"))
                dens_len = len(cdf.varget("DENS"))
                temp_len = len(cdf.varget("TEMP"))
                
                print("Data consistency check:")
                print(f"  Epoch length: {epoch_len}")
                print(f"  DENS length: {dens_len}")
                print(f"  TEMP length: {temp_len}")
                print(f"  All lengths match: {epoch_len == dens_len == temp_len}")
                
            except Exception as e:
                print(f"  Error checking data consistency: {e}")
                
    except Exception as e:
        print(f"Error opening CDF file: {e}")
        import traceback
        print(traceback.format_exc())
        
else:
    print("No CDF files available to analyze. Please run the download cell first.")


Examining DENS and TEMP variables in: psp_swp_spi_sf0a_l3_mom_20220601_v04.cdf
DENS (Alpha Particle Density):
  Data type: <class 'numpy.ndarray'>
  Array shape: (49438,)
  Data length: 49438
  Min value: 0.000000
  Max value: 192.568024
  Mean value: 22.501341
  Number of valid (non-NaN) values: 49437
  Number of NaN values: 1
  First 10 values: [30.092505 28.764273 25.335985 22.930447 15.495516 16.270725 17.464218
 20.046217 16.262999 24.364222]
  Units: cm^-3
  Field name: Density

TEMP (Alpha Particle Temperature):
  Data type: <class 'numpy.ndarray'>
  Array shape: (49438,)
  Data length: 49438
  Min value: -0.001357
  Max value: 6600.656738
  Mean value: 835.324280
  Number of valid (non-NaN) values: 49030
  Number of NaN values: 408
  First 10 values: [528.13574 423.38474 396.92    315.21683 353.25986 334.30505 457.11783
 413.87183 273.8746  369.0258 ]
  Units: eV
  Field name: Temperature

Epoch (Time variable for reference):
  Data type: <class 'numpy.ndarray'>
  Array shape: 