In [1]:
import os
import sys
import asyncio
import json
from pathlib import Path

# 确保项目根目录在sys.path中
current_file = Path.cwd()
project_root = current_file.parent
sys.path.append(str(project_root))

# 添加backend目录到sys.path
backend_dir = project_root / "backend"
sys.path.append(str(backend_dir))
from dotenv import load_dotenv
load_dotenv()
try:
    from backend.src.tools.satellite_crawler import SatelliteCrawler
    from backend.src.tools.satellite_data_processor import SatelliteDataProcessor
    print("✅ 成功导入爬虫模块")
except ImportError as e:
    print(f"❌ 导入失败: {e}")
    sys.exit(1)


✅ 成功导入爬虫模块


In [2]:
print("\n1️⃣ 测试爬虫初始化...")
crawler = SatelliteCrawler()
print("✅ 爬虫初始化成功")


1️⃣ 测试爬虫初始化...
✅ 爬虫初始化成功


In [3]:
print("\n2️⃣ 测试获取主页内容...")
main_page_content = await crawler.fetch_page(crawler.base_url)
if main_page_content:
    print(f"✅ 成功获取主页内容，长度: {len(main_page_content)} 字符")
else:
    print("❌ 获取主页内容失败")


2️⃣ 测试获取主页内容...
✅ 成功获取主页内容，长度: 39504 字符


In [4]:
print("\n3️⃣ 测试解析最近发射列表...")
recent_satellites = await crawler.crawl_recent_satellites()
print(f"✅ 解析到 {len(recent_satellites)} 个最近发射的卫星")


3️⃣ 测试解析最近发射列表...
✅ 解析到 6 个最近发射的卫星


In [5]:
print(recent_satellites[0].keys())

dict_keys(['launch_id', 'launch_date', 'satellite_name', 'satellite_url', 'vehicle', 'site', 'remark', 'source_url', 'crawl_time', 'description', 'short_name', 'detailed_specs', 'mass_kg', 'primary_application', 'applications', 'raw_content_sample'])


In [6]:
for i, satellite in enumerate(recent_satellites[0:1], 1):
        print(f"\n📡 卫星 {i}: {satellite.get('satellite_name', 'Unknown')}")
        print(f"   🔗 详情链接: {satellite.get('source_url', 'N/A')}")
        
        # 检查基本信息
        basic_fields = ['launch_date', 'vehicle', 'site', 'remark']
        print(f"   📋 基本信息:")
        for field in basic_fields:
            value = satellite.get(field, 'N/A')
            print(f"      {field}: {value}")
        
        # 检查增强信息
        enhanced_fields = [
            'description', 'short_name', 'full_name', 
            'detailed_specs', 'orbit_parameters', 'primary_application'
        ]
        
        print(f"   🔍 增强信息:")
        for field in enhanced_fields:
            if field in satellite:
                value = satellite[field]
                if field == 'description':
                    # 描述信息可能很长，只显示前100字符
                    display_value = value[:100] + "..." if len(str(value)) > 100 else value
                elif field == 'detailed_specs':
                    # 显示技术规格的键
                    display_value = f"包含 {len(value) if isinstance(value, dict) else 0} 个技术参数"
                    if isinstance(value, dict):
                        print(f"      {field}: {display_value}")
                        for spec_key, spec_value in value.items():
                            print(f"         {spec_key}: {spec_value}")
                    continue
                elif field == 'orbit_parameters':
                    # 显示轨道参数
                    display_value = f"包含 {len(value) if isinstance(value, dict) else 0} 个轨道参数"
                    if isinstance(value, dict):
                        print(f"      {field}: {display_value}")
                        for param_key, param_value in value.items():
                            print(f"         {param_key}: {param_value}")
                    continue
                else:
                    display_value = value
                
                print(f"      {field}: {display_value}")
            else:
                print(f"      {field}: 未获取到")


📡 卫星 1: QPS-SAR 12 (Kushinada 1)
   🔗 详情链接: https://space.skyrocket.de/doc_sdat/qps-sar-3.htm
   📋 基本信息:
      launch_date: 05.08.2025
      vehicle: Electron KS
      site: OnS LC-1B
      remark: 
   🔍 增强信息:
      description: QPS-SAR 3 [iQPS] QPS-SAR is a series of small high-resolution X-band SAR (synthetic aperture radar) ...
      short_name: QPS-SAR
      full_name: 未获取到
      detailed_specs: 包含 7 个技术参数
         nation: Japan
         type_application: Earth observation, radar
         operator: iQPS
         contractors: iQPS
         equipment: X-band SAR
         power: 2 deployable solar arrays, solar cells, batteries
         mass: ~100 kg
      orbit_parameters: 未获取到
      primary_application: Earth observation, radar


In [8]:
print(f"\n2️⃣ 测试增强的数据处理器...")
processor = SatelliteDataProcessor()

# 格式化数据
formatted_data = await processor.clean_and_format_data(recent_satellites[:2])

if formatted_data:
    print(f"✅ 成功格式化 {len(formatted_data)} 个卫星数据")
    
    # 显示格式化后的第一个卫星
    if len(formatted_data) > 0:
        first_formatted = formatted_data[0]
        print(f"\n📊 格式化后的数据示例 ({first_formatted.get('satelliteName', 'Unknown')}):")
        
        # 显示关键字段
        key_fields = [
            'satelliteName', 'alternateNames', 'COSPARId', 'NORADId',
            'satelliteAgencies', 'owner', 'launchDate', 'launchSite',
            'period', 'inclination', 'apogee', 'perigee', 'dryMass',
            'orbitType', 'applications', 'isEO'
        ]
        
        for field in key_fields:
            if field in first_formatted:
                value = first_formatted[field]
                # 格式化显示
                if isinstance(value, list):
                    display_value = f"[{', '.join(map(str, value))}]"
                elif isinstance(value, (int, float)):
                    display_value = f"{value}"
                else:
                    display_value = str(value)[:50] + "..." if len(str(value)) > 50 else str(value)
                print(f"      {field}: {display_value}")
        
        # 显示爬取元数据
        crawl_metadata = first_formatted.get('_crawl_metadata', {})
        if crawl_metadata:
            print(f"\n   🔧 爬取元数据:")
            for key, value in crawl_metadata.items():
                if key not in ['description', 'detailed_specs']:  # 这些字段太长
                    print(f"      {key}: {value}")
else:
    print("❌ 数据格式化失败")


2️⃣ 测试增强的数据处理器...
✅ 成功格式化 2 个卫星数据

📊 格式化后的数据示例 (QPS-SAR 12 (Kushinada 1)):
      satelliteName: QPS-SAR 12 (Kushinada 1)
      alternateNames: [QPS-SAR]
      COSPARId: 
      NORADId: None
      satelliteAgencies: iQPS
      owner: Japan
      launchDate: 2025-08-05
      launchSite: OnS LC-1B
      period: None
      inclination: None
      apogee: None
      perigee: None
      dryMass: 100.0
      orbitType: 
      applications: [Earth observation, radar]
      isEO: Earth observation


In [12]:
for i in first_formatted.keys():
    print(f'{i}: {first_formatted[i]}')
    # print('\n')


satelliteName: QPS-SAR 12 (Kushinada 1)
alternateNames: ['QPS-SAR']
COSPARId: 
NORADId: None
objectType: PAY
operStatusCode: Operational
satelliteAgencies: iQPS
owner: Japan
launchDate: 2025-08-05
launchSite: OnS LC-1B
eolDate: 
period: None
inclination: None
apogee: None
perigee: None
rcs: None
dryMass: 100.0
launchMass: 100.0
orbitCenter: 
orbitType: 
orbitAltitude: 
repeatCycle: 
ect: 
orbitLongitude: 
orbitSense: 
applications: ['Earth observation', 'radar']
webInfo: []
dataPortal: []
instrumentNames: ['X-band SAR']
instrumentIds: []
isEO: Earth observation
relatedSatIds: []
eoPortal: 
hasInstrumentId: []


In [12]:

print(os.getenv("DEEPSEEK_API_KEY"))

sk-7138314907c142d0b9cbdd89db3b1065
