Search for solar type II radio bursts in this catalog

In [4]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from io import StringIO

In [5]:
# Base URL of the directory
base_url = 'https://soleil.i4ds.ch/solarradio/data/BurstLists/2010-yyyy_Monstein'
year = 2023

In [15]:
# Get the HTML content
response = requests.get(f'{base_url}/{year}')
soup = BeautifulSoup(response.text, 'html.parser')

# Extract all .txt file links
txt_links = [a['href'] for a in soup.find_all('a') if a['href'].endswith('.txt')]

In [16]:
txt_links

['e-CALLISTO_2023_01.txt',
 'e-CALLISTO_2023_02.txt',
 'e-CALLISTO_2023_03.txt',
 'e-CALLISTO_2023_04.txt',
 'e-CALLISTO_2023_05.txt',
 'e-CALLISTO_2023_06.txt',
 'e-CALLISTO_2023_07.txt',
 'e-CALLISTO_2023_08.txt',
 'e-CALLISTO_2023_09.txt',
 'e-CALLISTO_2023_10.txt',
 'e-CALLISTO_2023_11.txt',
 'e-CALLISTO_2023_12.txt']

In [17]:
# Dictionary to hold filename -> DataFrame
dataframes = {}

for link in txt_links:
    file_url = f'{base_url}/{year}/{link}'
    print(f'Downloading: {file_url}')

    # Get text content
    txt = requests.get(file_url).text

    # Remove comments and blank lines
    lines = [line for line in txt.splitlines() if line.strip() and not line.startswith('#')]

    # Skip if not enough data
    if len(lines) < 5:
        print(f'Skipping {link}: too few valid lines')
        continue

    # Guess delimiter using csv.Sniffer
    sample = '\n'.join(lines[:10])
    try:
        dialect = csv.Sniffer().sniff(sample)
        delimiter = dialect.delimiter
    except Exception:
        delimiter = None

    try:
        df = pd.read_csv(StringIO('\n'.join(lines)), sep=delimiter or r'\s+', engine='python', header=None)
        dataframes[link] = df
    except Exception as e:
        print(f'Failed to parse {link}: {e}')

print(f'\n✅ Loaded {len(dataframes)} files into memory.')

Downloading: https://soleil.i4ds.ch/solarradio/data/BurstLists/2010-yyyy_Monstein/2023/e-CALLISTO_2023_01.txt
Failed to parse e-CALLISTO_2023_01.txt: Expected 5 fields in line 2, saw 9. Error could possibly be due to quotes being ignored when a multi-char delimiter is used.
Downloading: https://soleil.i4ds.ch/solarradio/data/BurstLists/2010-yyyy_Monstein/2023/e-CALLISTO_2023_02.txt
Failed to parse e-CALLISTO_2023_02.txt: Expected 4 fields in line 2, saw 10. Error could possibly be due to quotes being ignored when a multi-char delimiter is used.
Downloading: https://soleil.i4ds.ch/solarradio/data/BurstLists/2010-yyyy_Monstein/2023/e-CALLISTO_2023_03.txt
Failed to parse e-CALLISTO_2023_03.txt: Expected 4 fields in line 2, saw 13. Error could possibly be due to quotes being ignored when a multi-char delimiter is used.
Downloading: https://soleil.i4ds.ch/solarradio/data/BurstLists/2010-yyyy_Monstein/2023/e-CALLISTO_2023_04.txt
Failed to parse e-CALLISTO_2023_04.txt: Expected 6 fields in li

In [None]:
# Example: Show keys and preview one dataframe
print(f'Downloaded {len(dataframes)} files.')
print(dataframes.keys())
print(dataframes['2023-01.txt'].head())