In [45]:
import numpy as np
from pds4_tools import pds4_read
import pandas as pd
import spiceypy as spice
from datetime import datetime, timedelta
import glob
import requests
from bs4 import BeautifulSoup
import os
from pathlib import Path
import urllib.parse

In [38]:
structures = pds4_read('../data/pds4/mess_rs_07155_156_10s_odf.xml')

Processing label: ../data/pds4/mess_rs_07155_156_10s_odf.xml
Now processing a Table_Binary structure: ODF File Label Group Header
Now processing a Table_Binary structure: ODF File Label Group Data
Now processing a Table_Binary structure: ODF Identifier Group Header
Now processing a Table_Binary structure: ODF Identifier Group Data
Now processing a Table_Binary structure: ODF Orbit Data Group Header
Now processing a Table_Binary structure: ODF Orbit Data Group Data
Now processing a Table_Binary structure: ODF Ramp Group Header (Station 63)
Now processing a Table_Binary structure: ODF Ramp Group Data (Station 63)
Now processing a Table_Binary structure: ODF Ramp Group Header (Station 14)
Now processing a Table_Binary structure: ODF Ramp Group Data (Station 14)
Now processing a Table_Binary structure: ODF Ramp Group Header (Station 43)
Now processing a Table_Binary structure: ODF Ramp Group Data (Station 43)
Now processing a Table_Binary structure: ODF End-of-File Group


In [5]:
structures.info()

0   Table_Binary  'ODF File Label Group Header'         4 fields x 1 records
1   Table_Binary  'ODF File Label Group Data'           7 fields x 1 records
2   Table_Binary  'ODF Identifier Group Header'         4 fields x 1 records
3   Table_Binary  'ODF Identifier Group Data'           3 fields x 1 records
4   Table_Binary  'ODF Orbit Data Group Header'         4 fields x 1 records
5   Table_Binary  'ODF Orbit Data Group Data'           7 fields x 13099 records
6   Table_Binary  'ODF Ramp Group Header (Station 63)'  4 fields x 1 records
7   Table_Binary  'ODF Ramp Group Data (Station 63)'    9 fields x 97 records
8   Table_Binary  'ODF Ramp Group Header (Station 14)'  4 fields x 1 records
9   Table_Binary  'ODF Ramp Group Data (Station 14)'    9 fields x 48 records
10  Table_Binary  'ODF Ramp Group Header (Station 43)'  4 fields x 1 records
11  Table_Binary  'ODF Ramp Group Data (Station 43)'    9 fields x 24 records
12  Table_Binary  'ODF End-of-File Group'               4 fields x 1 

In [39]:
orbit_data_table = structures[5]
structured_array = orbit_data_table.data

In [40]:
print(f"Тип данных: {type(structured_array)}")
print(f"Форма данных: {structured_array.shape}")
print(f"dtype: {structured_array.dtype}")
print(f"Имена полей: {structured_array.dtype.names}")

Тип данных: <class 'pds4_tools.reader.data.PDS_ndarray'>
Форма данных: (13099,)
dtype: (numpy.record, [('Record Time Tag, integer part', '>u4'), ('Items 2-3', 'S4'), ('Observable, integer part', '>i4'), ('Observable, fractional part', '>i4'), ('Items 6-14', 'S4'), ('Items 15-19', 'S8'), ('Items 20-22', 'S8')])
Имена полей: ('Record Time Tag, integer part', 'Items 2-3', 'Observable, integer part', 'Observable, fractional part', 'Items 6-14', 'Items 15-19', 'Items 20-22')


In [77]:
def extract_bits_from_bytes(byte_data, start_bit, num_bits):
    if isinstance(byte_data, (bytes, np.bytes_)) and len(byte_data) > 0:
        value = int.from_bytes(byte_data, byteorder='big', signed=False)
        
        mask = (1 << num_bits) - 1
        
        total_bits = len(byte_data) * 8
        shift = total_bits - start_bit - num_bits + 1
        if shift < 0:
            return 0
        return (value >> shift) & mask
    else:
        return 0

def extract_items_6_14(packed_data):
    if isinstance(packed_data, (bytes, np.bytes_)) and len(packed_data) > 0:
        format_id = extract_bits_from_bytes(packed_data, 1, 3)           # Item 6: Format ID (биты 1-3)
        receiving_station_id = extract_bits_from_bytes(packed_data, 4, 7) # Item 7: Receiving Station ID (биты 4-10)
        transmitting_station_id = extract_bits_from_bytes(packed_data, 11, 7) # Item 8: Transmitting Station ID (биты 11-17)
        network_id = extract_bits_from_bytes(packed_data, 18, 2)         # Item 9: Network ID (биты 18-19)
        data_type_id = extract_bits_from_bytes(packed_data, 20, 6)       # Item 10: Data Type ID (биты 20-25)
        downlink_band_id = extract_bits_from_bytes(packed_data, 26, 2)   # Item 11: Downlink Band ID (биты 26-27)
        uplink_band_id = extract_bits_from_bytes(packed_data, 28, 2)     # Item 12: Uplink Band ID (биты 28-29)
        ref_freq_band_id = extract_bits_from_bytes(packed_data, 30, 2)   # Item 13: Reference Frequency Band ID (биты 30-31)
        data_validity = extract_bits_from_bytes(packed_data, 32, 1)      # Item 14: Data Validity Indicator (бит 32)
    else:
        format_id = receiving_station_id = transmitting_station_id = network_id = 0
        data_type_id = downlink_band_id = uplink_band_id = ref_freq_band_id = data_validity = 0
    
    return {
        'format_id': format_id,
        'receiving_station_id': receiving_station_id,
        'transmitting_station_id': transmitting_station_id,
        'network_id': network_id,
        'data_type_id': data_type_id,
        'downlink_band_id': downlink_band_id,
        'uplink_band_id': uplink_band_id,
        'ref_freq_band_id': ref_freq_band_id,
        'data_validity': data_validity
    }

def extract_items_15_19(packed_data):
    if isinstance(packed_data, (bytes, np.bytes_)) and len(packed_data) > 0:
        item_15 = extract_bits_from_bytes(packed_data, 1, 7)    # Item 15
        item_16 = extract_bits_from_bytes(packed_data, 8, 10)   # Item 16
        item_17 = extract_bits_from_bytes(packed_data, 18, 1)   # Item 17
        item_18 = extract_bits_from_bytes(packed_data, 19, 22)  # Item 18
        item_19 = extract_bits_from_bytes(packed_data, 41, 24)  # Item 19
    else:
        item_15 = item_16 = item_17 = item_18 = item_19 = 0
    
    return {
        'item_15': item_15,
        'item_16': item_16,
        'item_17': item_17,
        'item_18': item_18,
        'item_19': item_19
    }

def extract_items_20_22(packed_data):
    if isinstance(packed_data, (bytes, np.bytes_)) and len(packed_data) > 0:
        item_20 = extract_bits_from_bytes(packed_data, 1, 20)   # Item 20
        item_21 = extract_bits_from_bytes(packed_data, 21, 22)  # Item 21
        item_22 = extract_bits_from_bytes(packed_data, 43, 22)  # Item 22
    else:
        item_20 = item_21 = item_22 = 0
    
    return {
        'item_20': item_20,
        'item_21': item_21,
        'item_22': item_22
    }

In [64]:
def parse_to_df(file_name):
    structures = pds4_read(file_name)
    orbit_data_table = structures[5]
    structured_array = orbit_data_table.data
    
    extracted_data = []

    spice_epoch = datetime(1950, 1, 1)
    
    for i, record in enumerate(structured_array):
        time_tag_int = record[0]
        record_time = spice_epoch + timedelta(seconds=int(time_tag_int))
        
        observable_int = record[2]
        observable_frac = record[3]
        
        items_6_14 = extract_items_6_14(record[4])
        
        full_observable = observable_int + observable_frac * 1e-9
        if items_6_14['network_id'] == 0 and items_6_14['data_type_id'] == 11 and items_6_14['data_validity'] == 0 and items_6_14['format_id'] == 2:
            extracted_record = {
                'time_tag_seconds': time_tag_int,
                'record_tine': record_time,
                'full_observable': full_observable,
                'observable_int': observable_int,
                'observable_frac': observable_frac,
                'receiving_station_id': items_6_14['receiving_station_id'],
                'transmitting_station_id': items_6_14['transmitting_station_id'],
                'data_type_id': items_6_14['data_type_id'],
                'data_validity': items_6_14['data_validity'],
                'format_id': items_6_14['format_id'],
            }
            
            extracted_data.append(extracted_record)
    
    df_extracted = pd.DataFrame(extracted_data)
    return df_extracted

In [78]:
current_dir = Path.cwd()
data_dir = current_dir.parent / "data" 
pds4_dir = data_dir / "pds4"
csv_dir = data_dir / "csv"

for year in range(2011, 2016):
    current_csv_dir = csv_dir / str(year)
    current_pds4_dir = pds4_dir / str(year)
    xml_files = glob.glob(str(current_pds4_dir / "*_odf.xml"))

    for xml_file in xml_files:
        xml_path = str(current_pds4_dir / xml_file)
        
        base_name = Path(xml_file).stem
        csv_filename = f"{base_name}.csv"
        csv_path = str(current_csv_dir / csv_filename)
        
        df_xml = parse_to_df(xml_path)
        df_xml.to_csv(csv_path, index=False)
    
    print(f'End of parsing year {year}')
    
print('End of parsing')

Processing label: /home/sun/PycharmProjects/odf_parse/Messenger-Orbit/data/pds4/2011/mess_rs_11007_008_odf.xml
Now processing a Table_Binary structure: ODF File Label Group Header
Now processing a Table_Binary structure: ODF File Label Group Data
Now processing a Table_Binary structure: ODF Identifier Group Header
Now processing a Table_Binary structure: ODF Identifier Group Data
Now processing a Table_Binary structure: ODF Orbit Data Group Header
Now processing a Table_Binary structure: ODF Orbit Data Group Data
Now processing a Table_Binary structure: ODF Ramp Group Header (Station 45)
Now processing a Table_Binary structure: ODF Ramp Group Data (Station 45)
Now processing a Table_Binary structure: ODF End-of-File Group
Processing label: /home/sun/PycharmProjects/odf_parse/Messenger-Orbit/data/pds4/2011/mess_rs_11137_138_odf.xml
Now processing a Table_Binary structure: ODF File Label Group Header
Now processing a Table_Binary structure: ODF File Label Group Data
Now processing a Tabl

In [75]:
# Скачивание файлов
def check_page_content(year):
    current_dir = Path.cwd()
    data_dir = current_dir.parent / "data"
    pds4_dir = data_dir / "pds4" / str(year)
    pds4_dir.mkdir(parents=True, exist_ok=True)
    
    base_url = "https://pds-geosciences.wustl.edu/messenger/urn-nasa-pds-mess-rs-raw/data-odf/" + str(year) + '/'
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36'
    }
    
    response = requests.get(base_url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    all_files = []
    for link in soup.find_all('a'):
        href = link.get('href')
        if href and (href.endswith('.xml') or href.endswith('.dat')):
            filename = href.split('/')[-1]
            all_files.append((href, filename))
    
    number_files = len(all_files)
    
    downloaded = 0
    skipped = 0
    idx = 0
    
    for file_url_suffix, filename in all_files:
        file_url = urllib.parse.urljoin(base_url, file_url_suffix)
        file_path = pds4_dir / filename
        
        idx += 1
        if file_path.exists():
            print(f"Пропуск (уже существует): {filename}")
            skipped += 1
            continue
        
        print(f"Скачивание {idx}/{number_files} of year {year}: {filename}")
        
        try:
            file_response = requests.get(file_url, headers=headers, timeout=60)
            file_response.raise_for_status()
            
            with open(file_path, 'wb') as f:
                f.write(file_response.content)
            
            downloaded += 1
            print(f"Успешно: {filename} ({len(file_response.content)} bytes)")
            
        except Exception as e:
            print(f"Ошибка: {filename} - {e}")

for year in range(2007, 2016):
    check_page_content(year)

Найдено файлов для скачивания: 452
✓ Пропускаем (уже существует): mess_rs_08002_003_10s_odf.dat
✓ Пропускаем (уже существует): mess_rs_08002_003_10s_odf.xml
✓ Пропускаем (уже существует): mess_rs_08002_003_odf.dat
✓ Пропускаем (уже существует): mess_rs_08002_003_odf.xml
✓ Пропускаем (уже существует): mess_rs_08003_005_10s_odf.dat
✓ Пропускаем (уже существует): mess_rs_08003_005_10s_odf.xml
✓ Пропускаем (уже существует): mess_rs_08004_007_odf.dat
✓ Пропускаем (уже существует): mess_rs_08004_007_odf.xml
✓ Пропускаем (уже существует): mess_rs_08005_006_10s_odf.dat
✓ Пропускаем (уже существует): mess_rs_08005_006_10s_odf.xml
✓ Пропускаем (уже существует): mess_rs_08007_008_10s_odf.dat
✓ Пропускаем (уже существует): mess_rs_08007_008_10s_odf.xml
✓ Пропускаем (уже существует): mess_rs_08007_010_odf.dat
✓ Пропускаем (уже существует): mess_rs_08007_010_odf.xml
✓ Пропускаем (уже существует): mess_rs_08008_010_10s_odf.dat
✓ Пропускаем (уже существует): mess_rs_08008_010_10s_odf.xml
↓ Скачиваем 1