# PSP FIELDS Electric Field Data Integration with PySpedas

This notebook demonstrates how to download and work with PSP FIELDS electric field spectral data using pyspedas.
Electric field spectra provide crucial information about plasma waves and electromagnetic phenomena.
We'll use the same date range as other integration tests: 2022/06/01 20:00:00.000 to 2022/06/02 02:00:00.000


In [1]:
# Cell 1: Download Electric Field data and determine file paths
import pyspedas
import os
import cdflib

# Define the same date range as other integration tests
trange = ['2022/06/01 20:00:00.000', '2022/06/02 02:00:00.000']
dfb_ac_datatype = 'dfb_ac_spec'  # Electric field AC spectrum data

print(f"Downloading PSP FIELDS Electric Field data for time range: {trange}")
print(f"Datatype: {dfb_ac_datatype}")

# Download with downloadonly=True and notplot=True
downloaded_files = pyspedas.psp.fields(
    trange=trange, 
    datatype=dfb_ac_datatype, 
    level='l2', 
    time_clip=True,
    get_support_data=True,
    downloadonly=True,  # Only download, don't load into memory
    notplot=True        # Don't create plots
)

print(f"\nDownload completed. Files returned: {len(downloaded_files) if downloaded_files else 0}")

if downloaded_files:
    for i, file_path in enumerate(downloaded_files):
        print(f"File {i+1}: {file_path}")
        
        # Get absolute path
        abs_path = os.path.abspath(file_path)
        print(f"  Absolute path: {abs_path}")
        
        # Check if file exists
        if os.path.exists(abs_path):
            file_size = os.path.getsize(abs_path) / (1024*1024)  # MB
            print(f"  File size: {file_size:.2f} MB")
            print(f"  File exists: Yes")
        else:
            print(f"  File exists: No")
        
        # Show directory structure
        directory = os.path.dirname(abs_path)
        print(f"  Directory: {directory}")
        
        print()
else:
    print("No files were downloaded or found.")


30-Jun-25 14:50:02: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/psp/fields/l2/dfb_ac_spec/dv12hg/2022/


Downloading PSP FIELDS Electric Field data for time range: ['2022/06/01 20:00:00.000', '2022/06/02 02:00:00.000']
Datatype: dfb_ac_spec


30-Jun-25 14:50:02: Downloading https://spdf.gsfc.nasa.gov/pub/data/psp/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220601_v01.cdf to psp_data/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220601_v01.cdf
30-Jun-25 14:50:04: Download complete: psp_data/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220601_v01.cdf
30-Jun-25 14:50:04: Downloading https://spdf.gsfc.nasa.gov/pub/data/psp/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220602_v01.cdf to psp_data/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220602_v01.cdf
30-Jun-25 14:50:05: Download complete: psp_data/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220602_v01.cdf
30-Jun-25 14:50:05: Downloading remote index: https://spdf.gsfc.nasa.gov/pub/data/psp/fields/l2/dfb_ac_spec/dv34hg/2022/
30-Jun-25 14:50:06: Downloading https://spdf.gsfc.nasa.gov/pub/data/psp/fields/l2/dfb_ac_spec/dv34hg/2022/psp_fld_l2_dfb_ac_spec_


Download completed. Files returned: 8
File 1: psp_data/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220601_v01.cdf
  Absolute path: /Users/robertalexander/GitHub/Plotbot/psp_data/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220601_v01.cdf
  File size: 8.58 MB
  File exists: Yes
  Directory: /Users/robertalexander/GitHub/Plotbot/psp_data/fields/l2/dfb_ac_spec/dv12hg/2022

File 2: psp_data/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220602_v01.cdf
  Absolute path: /Users/robertalexander/GitHub/Plotbot/psp_data/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220602_v01.cdf
  File size: 8.05 MB
  File exists: Yes
  Directory: /Users/robertalexander/GitHub/Plotbot/psp_data/fields/l2/dfb_ac_spec/dv12hg/2022

File 3: psp_data/fields/l2/dfb_ac_spec/dv34hg/2022/psp_fld_l2_dfb_ac_spec_dv34hg_20220601_v01.cdf
  Absolute path: /Users/robertalexander/GitHub/Plotbot/psp_data/fields/l2/dfb_ac_spec/dv34hg/2022/psp_fld_l

In [2]:
# Cell 2: Extract variable names from the CDF file
import cdflib

if downloaded_files and len(downloaded_files) > 0:
    # Use the first downloaded file
    cdf_file_path = downloaded_files[0]
    abs_cdf_path = os.path.abspath(cdf_file_path)
    
    print(f"Analyzing CDF file: {os.path.basename(abs_cdf_path)}")
    print(f"Full path: {abs_cdf_path}")
    print("="*80)
    
    try:
        # Open the CDF file
        with cdflib.CDF(abs_cdf_path) as cdf:
            # Get CDF info
            cdf_info = cdf.cdf_info()
            
            print(f"CDF File Info:")
            print(f"  CDF Version: {cdf_info.Version}")
            print(f"  Encoding: {getattr(cdf_info, 'Encoding', 'Unknown')}")
            print(f"  Majority: {getattr(cdf_info, 'Majority', 'Unknown')}")
            print(f"  Number of rDimensions: {getattr(cdf_info, 'Num_rdim', 0)}")
            print(f"  rDimension sizes: {getattr(cdf_info, 'rDim_sizes', [])}")
            print(f"  Number of zVariables: {len(cdf_info.zVariables)}")
            print(f"  Number of rVariables: {len(cdf_info.rVariables)}")
            print(f"  Compressed: {getattr(cdf_info, 'Compressed', 'Unknown')}")
            print(f"  Checksum: {getattr(cdf_info, 'Checksum', 'Unknown')}")
            print()
            
            # List all zVariables (most data variables)
            print("zVariables (data variables):")
            for i, var_name in enumerate(cdf_info.zVariables):
                try:
                    var_info = cdf.varinq(var_name)
                    print(f"  {i+1:2d}. {var_name:30s} - {var_info.Data_Type_Description:15s} - Shape: {var_info.Dim_Sizes}")
                except Exception as e:
                    print(f"  {i+1:2d}. {var_name:30s} - Error getting info: {e}")
            
            print()
            
            # List all rVariables (usually metadata)
            if cdf_info.rVariables:
                print("rVariables (metadata variables):")
                for i, var_name in enumerate(cdf_info.rVariables):
                    try:
                        var_info = cdf.varinq(var_name)
                        print(f"  {i+1:2d}. {var_name:30s} - {var_info.Data_Type_Description:15s} - Shape: {var_info.Dim_Sizes}")
                    except Exception as e:
                        print(f"  {i+1:2d}. {var_name:30s} - Error getting info: {e}")
            else:
                print("No rVariables found.")
            
            print()
            
            # Look specifically for Electric Field-related variables
            print("Electric Field-related variables (containing 'electric', 'e_field', 'dfb', 'ac', or 'spec'):")
            efield_vars = []
            all_vars = cdf_info.zVariables + cdf_info.rVariables
            
            for var_name in all_vars:
                lower_name = var_name.lower()
                if any(keyword in lower_name for keyword in ['electric', 'e_field', 'dfb', 'ac', 'spec', 'efield', 'voltage']):
                    efield_vars.append(var_name)
                    try:
                        var_info = cdf.varinq(var_name)
                        print(f"  • {var_name:30s} - {var_info.Data_Type_Description:15s} - Shape: {var_info.Dim_Sizes}")
                    except Exception as e:
                        print(f"  • {var_name:30s} - Error getting info: {e}")
            
            if not efield_vars:
                print("  No obvious electric field-related variables found.")
            
            print()
            
            # Look for frequency/spectral variables
            print("Frequency/Spectral variables (containing 'freq', 'hz', 'frequency', or 'bins'):")
            freq_vars = []
            for var_name in all_vars:
                lower_name = var_name.lower()
                if any(keyword in lower_name for keyword in ['freq', 'hz', 'frequency', 'bins', 'spectral']):
                    freq_vars.append(var_name)
                    try:
                        var_info = cdf.varinq(var_name)
                        print(f"  • {var_name:30s} - {var_info.Data_Type_Description:15s} - Shape: {var_info.Dim_Sizes}")
                    except Exception as e:
                        print(f"  • {var_name:30s} - Error getting info: {e}")
            
            if not freq_vars:
                print("  No frequency/spectral variables found.")
            
            print()
            
            # Look for time variables
            print("Time variables (containing 'epoch' or 'time'):")
            time_vars = []
            for var_name in all_vars:
                lower_name = var_name.lower()
                if 'epoch' in lower_name or 'time' in lower_name:
                    time_vars.append(var_name)
                    try:
                        var_info = cdf.varinq(var_name)
                        print(f"  • {var_name:30s} - {var_info.Data_Type_Description:15s} - Shape: {var_info.Dim_Sizes}")
                    except Exception as e:
                        print(f"  • {var_name:30s} - Error getting info: {e}")
            
            if not time_vars:
                print("  No time variables found.")
            
            print()
            print(f"Total variables found: {len(all_vars)}")
            print(f"Electric field-related variables: {len(efield_vars)}")
            print(f"Frequency/spectral variables: {len(freq_vars)}")
            print(f"Time variables: {len(time_vars)}")
            
    except Exception as e:
        print(f"Error reading CDF file: {e}")
        import traceback
        print(traceback.format_exc())
        
else:
    print("No CDF files available to analyze. Please run the download cell first.")


Analyzing CDF file: psp_fld_l2_dfb_ac_spec_dv12hg_20220601_v01.cdf
Full path: /Users/robertalexander/GitHub/Plotbot/psp_data/fields/l2/dfb_ac_spec/dv12hg/2022/psp_fld_l2_dfb_ac_spec_dv12hg_20220601_v01.cdf
CDF File Info:
  CDF Version: 3.8.0
  Encoding: 1
  Majority: Column_major
  Number of rDimensions: 0
  rDimension sizes: []
  Number of zVariables: 6
  Number of rVariables: 0
  Compressed: False
  Checksum: False

zVariables (data variables):
   1. epoch_ac_spec_dV12hg           - CDF_TIME_TT2000 - Shape: []
   2. psp_fld_l2_dfb_ac_spec_dV12hg_frequency_bins - CDF_FLOAT       - Shape: [54]
   3. psp_fld_l2_dfb_ac_spec_dV12hg  - CDF_FLOAT       - Shape: [54]
   4. psp_fld_l2_dfb_ac_spec_dV12hg_saturation_flags - CDF_INT2        - Shape: []
   5. epoch_quality_flags            - CDF_TIME_TT2000 - Shape: []
   6. psp_fld_l2_quality_flags       - CDF_UINT4       - Shape: []

No rVariables found.

Electric Field-related variables (containing 'electric', 'e_field', 'dfb', 'ac', or 'spec'

In [None]:
# Cell 3: Examine electric field and spectral variables specifically
import numpy as np

if downloaded_files and len(downloaded_files) > 0:
    cdf_file_path = downloaded_files[0]
    abs_cdf_path = os.path.abspath(cdf_file_path)
    
    print(f"Examining electric field and spectral variables in: {os.path.basename(abs_cdf_path)}")
    print("="*80)
    
    try:
        with cdflib.CDF(abs_cdf_path) as cdf:
            # Get all variable names to search for relevant ones
            cdf_info = cdf.cdf_info()
            all_vars = cdf_info.zVariables + cdf_info.rVariables
            
            # Look for common electric field spectral variable names
            potential_vars = ['psp_fld_l2_dfb_ac_spec', 'dfb_ac_spec', 'electric_spectral_density', 
                             'ac_spec', 'spectral_density', 'voltage_spec', 'E_spec']
            
            found_vars = []
            for var in potential_vars:
                if var in all_vars:
                    found_vars.append(var)
            
            # If no exact matches, look for variables containing electric field keywords
            if not found_vars:
                print("No exact matches found, searching for variables containing electric field keywords...")
                for var_name in all_vars:
                    lower_name = var_name.lower()
                    if any(keyword in lower_name for keyword in ['dfb', 'ac', 'spec', 'electric', 'voltage', 'efield']):
                        found_vars.append(var_name)
            
            if found_vars:
                for var_name in found_vars[:5]:  # Limit to first 5 variables to avoid too much output
                    print(f"{var_name} (Potential Electric Field Spectral Variable):")
                    try:
                        var_data = cdf.varget(var_name)
                        print(f"  Data type: {type(var_data)}")
                        print(f"  Array shape: {var_data.shape}")
                        print(f"  Data length: {len(var_data) if hasattr(var_data, '__len__') else 'N/A'}")
                        
                        if hasattr(var_data, 'dtype') and np.issubdtype(var_data.dtype, np.number):
                            print(f"  Min value: {np.nanmin(var_data):.6e}")
                            print(f"  Max value: {np.nanmax(var_data):.6e}")
                            print(f"  Mean value: {np.nanmean(var_data):.6e}")
                            print(f"  Number of valid (non-NaN) values: {np.sum(~np.isnan(var_data))}")
                            print(f"  Number of NaN values: {np.sum(np.isnan(var_data))}")
                            
                            # Show first few values if 1D array
                            if len(var_data.shape) == 1 and len(var_data) > 0:
                                print(f"  First 10 values: {var_data[:10]}")
                            elif len(var_data.shape) == 2 and len(var_data) > 0:
                                print(f"  2D array - First row shape: {var_data[0].shape}")
                                print(f"  First row values (first 5): {var_data[0][:5]}")
                            else:
                                print(f"  Multi-dimensional array - shape: {var_data.shape}")
                        else:
                            print(f"  Data type: {var_data.dtype if hasattr(var_data, 'dtype') else 'Unknown'}")
                            if hasattr(var_data, '__len__') and len(var_data) > 0:
                                print(f"  First few values: {var_data[:5]}")
                        
                        # Get variable attributes
                        try:
                            var_attrs = cdf.varattsget(var_name)
                            if "UNITS" in var_attrs:
                                print(f"  Units: {var_attrs['UNITS']}")
                            if "FIELDNAM" in var_attrs:
                                print(f"  Field name: {var_attrs['FIELDNAM']}")
                            if "CATDESC" in var_attrs:
                                print(f"  Description: {var_attrs['CATDESC']}")
                        except:
                            pass
                            
                    except Exception as e:
                        print(f"  Error reading {var_name}: {e}")
                    
                    print()
            
            else:
                print("No electric field or spectral variables found.")
                print("Available variables:")
                for var in all_vars[:15]:  # Show first 15 variables
                    print(f"  - {var}")
                if len(all_vars) > 15:
                    print(f"  ... and {len(all_vars) - 15} more variables")
            
            print()
            
            # Also check for frequency bins
            print("Frequency information (looking for frequency bins/ranges):")
            freq_keywords = ['freq', 'frequency', 'hz', 'bins']
            freq_found = False
            
            for var_name in all_vars:
                if any(keyword in var_name.lower() for keyword in freq_keywords):
                    freq_found = True
                    try:
                        freq_data = cdf.varget(var_name)
                        print(f"  {var_name}:")
                        print(f"    Shape: {freq_data.shape}")
                        if hasattr(freq_data, 'dtype') and np.issubdtype(freq_data.dtype, np.number):
                            print(f"    Range: {np.nanmin(freq_data):.3e} to {np.nanmax(freq_data):.3e}")
                            if len(freq_data.shape) == 1 and len(freq_data) > 0:
                                print(f"    First few values: {freq_data[:5]}")
                        
                        # Get units
                        try:
                            var_attrs = cdf.varattsget(var_name)
                            if "UNITS" in var_attrs:
                                print(f"    Units: {var_attrs['UNITS']}")
                        except:
                            pass
                        print()
                        
                    except Exception as e:
                        print(f"  Error reading {var_name}: {e}")
            
            if not freq_found:
                print("  No frequency variables found")
            
            print()
            
            # Also check the time variable for context
            print("Epoch (Time variable for reference):")
            try:
                epoch_data = cdf.varget("Epoch")
                print(f"  Data type: {type(epoch_data)}")
                print(f"  Array shape: {epoch_data.shape}")
                print(f"  Data length: {len(epoch_data)}")
                print(f"  First timestamp: {cdflib.cdfepoch.to_datetime(epoch_data[0])}")
                print(f"  Last timestamp: {cdflib.cdfepoch.to_datetime(epoch_data[-1])}")
                print(f"  Total time span: {cdflib.cdfepoch.to_datetime(epoch_data[-1]) - cdflib.cdfepoch.to_datetime(epoch_data[0])}")
                
            except Exception as e:
                print(f"  Error reading Epoch: {e}")
                # Try alternative time variable names
                time_vars = [v for v in all_vars if 'time' in v.lower() or 'epoch' in v.lower()]
                if time_vars:
                    print(f"  Found alternative time variables: {time_vars}")
            
            print()
                
    except Exception as e:
        print(f"Error opening CDF file: {e}")
        import traceback
        print(traceback.format_exc())
        
else:
    print("No CDF files available to analyze. Please run the download cell first.")
