# dataset_schema_exploration.ipynb
# üì° Satellite Communications Dataset ‚Äì Schema Exploration

## Overview
This notebook explores the SatNOGS satellite communications dataset schema.
Goal: Understand the structure of all tables, their columns, and row counts,
so we can later prepare the dataset for machine learning tasks.

## Objectives:
- Inspect each table in the database (satnogs)
- Document column names and row counts
- Preview sample rows for context
- Identify relationships between tables
- Save schema summaries for reuse

In [1]:
# --- Install dependencies ---
%pip install sqlalchemy pandas pymysql tabulate

Collecting tabulate
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
# --- Imports ---
import sqlalchemy
import pandas as pd
from sqlalchemy import create_engine, inspect

print("SQLAlchemy version:", sqlalchemy.__version__)
print("Pandas version:", pd.__version__)

SQLAlchemy version: 2.0.44
Pandas version: 2.3.3


In [4]:
# --- Database connection ---
DB_USER = "root"
DB_PASSWORD = "123456789"
DB_HOST = "127.0.0.1"
DB_PORT = "3306"
DB_NAME = "satnogs"

engine = create_engine(f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")
inspector = inspect(engine)

# --- List all tables ---
tables = inspector.get_table_names()
print(f"Tables in database ({len(tables)}):")
for i, table in enumerate(tables, 1):
    print(f"  {i:2d}. {table}")

Tables in database (15):
   1. base_antenna
   2. base_antennatype
   3. base_frequencyrange
   4. base_launch
   5. base_mode
   6. base_observation
   7. base_operator
   8. base_satellite
   9. base_satelliteentry
  10. base_satelliteidentifier
  11. base_station
  12. base_stationstatuslog
  13. base_stationtype
  14. base_telemetry
  15. base_transmitterentry


In [5]:
# --- Helper function to explore a table ---
def explore_table(table_name, preview_rows=5):
    """Return schema summary for a given table."""
    cols = [c['name'] for c in inspector.get_columns(table_name)]
    count = pd.read_sql(f"SELECT COUNT(*) AS total_rows FROM {table_name}", engine)['total_rows'][0]
    df_preview = pd.read_sql(f"SELECT * FROM {table_name} LIMIT {preview_rows}", engine)
    
    summary = {
        "table": table_name,
        "columns": cols,
        "row_count": count,
        "preview": df_preview
    }
    return summary

In [7]:
# --- Explore all relevant tables ---
tables_to_explore = [
    "base_antenna",
    "base_antennatype",
    "base_frequencyrange",
    "base_launch",
    "base_mode",
    "base_observation",
    "base_operator",
    "base_satellite",
    "base_satelliteentry",
    "base_satelliteidentifier",
    "base_station",
    "base_stationstatuslog",
    "base_stationtype",
    "base_telemetry",
    "base_transmitterentry"
]

schema_summaries = []
for tbl in tables_to_explore:
    summary = explore_table(tbl)
    schema_summaries.append(summary)
    print(f"\n=== {tbl} ===")
    print(f"Row count: {summary['row_count']:,}")
    print(f"Columns ({len(summary['columns'])}): {', '.join(summary['columns'][:5])}..." if len(summary['columns']) > 5 else f"Columns: {', '.join(summary['columns'])}")
    print("Preview:")
    display(summary["preview"].head())


=== base_antenna ===
Row count: 4,437
Columns: id, antenna_type_id, station_id
Preview:


Unnamed: 0,id,antenna_type_id,station_id
0,1,12,256
1,2,12,1536
2,7,10,771
3,8,13,1285
4,9,15,6



=== base_antennatype ===
Row count: 17
Columns: id, name
Preview:


Unnamed: 0,id,name
0,6,Cross Yagi
1,1,Dipole
2,3,Discone
3,12,Eggbeater
4,4,Ground Plane



=== base_frequencyrange ===
Row count: 5,311
Columns: id, min_frequency, max_frequency, antenna_id
Preview:


Unnamed: 0,id,min_frequency,max_frequency,antenna_id
0,1,135000000,148000000,1
1,2,430000000,440000000,2
2,7,135000000,148000000,7
3,8,430000000,440000000,8
4,9,400000000,470000000,9



=== base_launch ===
Row count: 0
Columns: id, name, forum_thread_url, created, created_by_id
Preview:


Unnamed: 0,id,name,forum_thread_url,created,created_by_id



=== base_mode ===
Row count: 56
Columns: id, name
Preview:


Unnamed: 0,id,name
0,90,4FSK
1,49,AFSK
2,78,AFSK TUBiX10
3,17,AHRPT
4,19,AM



=== base_observation ===
Row count: 12,546,241
Columns (48): id, start, end, author_id, ground_station_id...
Preview:


Unnamed: 0,id,start,end,author_id,ground_station_id,max_altitude,rise_azimuth,set_azimuth,waterfall_status_datetime,vetted_status,...,station_antennas,station_lat,station_lng,audio_zipped,payload,waterfall,center_frequency,transmitter_status,transmitter_unconfirmed,sat_id
0,23,2015-10-12 15:13:16,2015-10-12 15:20:01,165,2,,,,2017-05-26 08:04:08,bad,...,,,,1,,,,,,UTXU-4881-3195-9394-3367
1,25,2015-10-12 17:03:08,2015-10-12 17:15:37,165,2,,,,2017-05-26 08:18:15,bad,...,,,,1,,,,,,FVYN-9469-5031-2236-7972
2,27,2015-10-12 17:30:27,2015-10-12 17:43:51,165,2,,,,2017-05-26 08:22:41,bad,...,,,,1,,,,,,HXCH-9043-9893-2952-4877
3,28,2015-10-12 18:08:16,2015-10-12 18:21:54,165,2,,,,2017-07-22 11:14:03,bad,...,,,,1,,,,,,IRES-5964-9687-1982-0089
4,33,2015-10-13 17:08:24,2015-10-13 17:21:56,165,2,,,,2017-09-10 19:00:05,bad,...,,,,1,,,,,,ZRIM-9073-8711-5268-6171



=== base_operator ===
Row count: 6
Columns: id, name, names, description, website
Preview:


Unnamed: 0,id,name,names,description,website
0,1,UVG,Universidad del Valle de Guatemala,,https://www.uvg.edu.gt/
1,2,LSF,Libre Space Foundation,,https://libre.space
2,3,ESA,European Space Agency,,https://www.esa.int/
3,4,ISRO,Indian Space Research Organisation,The Indian Space Research Organisation is the ...,https://www.isro.gov.in/
4,5,CIOMP,"Changchun Institute of Optics, Fine Mechanics ...",,http://english.ciomp.cas.cn/



=== base_satellite ===
Row count: 2,903
Columns: id, last_modified, associated_satellite_id, satellite_entry_id, satellite_identifier_id
Preview:


Unnamed: 0,id,last_modified,associated_satellite_id,satellite_entry_id,satellite_identifier_id
0,1,2021-07-21 10:11:41.947596,,7538,1
1,2,2021-07-21 10:11:41.949944,,1843,2
2,3,2021-07-21 10:11:41.951757,,1836,3
3,4,2021-07-21 10:11:41.953531,,1930,4
4,5,2021-07-21 10:11:41.955319,,1835,5



=== base_satelliteentry ===
Row count: 9,759
Columns (25): id, norad_cat_id, name, image, names...
Preview:


Unnamed: 0,id,norad_cat_id,name,image,names,status,description,decayed,dashboard_url,countries,...,approved,citation,created,created_by_id,reviewed,reviewer_id,satellite_identifier_id,launch_id,receive_review_update,review_message
0,1,7530,OSCAR 7,satellites/AO-7-Model-300x180.gif,AO-7,alive,This satellite was a small communications sate...,,,,...,1,CITATION NEEDED - https://xkcd.com/285/,2021-07-21 10:11:41.973567,,2021-07-21 10:11:41.973567,,16,,0,
1,2,14781,UOSAT 2,satellites/UoSat-2.jpg,UO-11\r OSCAR-11,alive,"Also known as OSCAR 11, this British built sat...",,,,...,1,CITATION NEEDED - https://xkcd.com/285/,2021-07-21 10:11:41.977018,,2021-07-21 10:11:41.977018,,18,,0,
2,3,20442,LUSAT,satellites/LUSAT-1.gif,LO-19,alive,Lusat was launched by the same Ariane vehicle ...,,,,...,1,CITATION NEEDED - https://xkcd.com/285/,2021-07-21 10:11:41.982120,,2021-07-21 10:11:41.982120,,21,,0,
3,4,22826,ITAMSAT,satellites/itamsat_1.jpg,IO-26,alive,Italy's first amateur radio satellite that use...,,,,...,1,CITATION NEEDED - https://xkcd.com/285/,2021-07-21 10:11:41.985554,,2021-07-21 10:11:41.985554,,23,,0,
4,5,23439,RADIO ROSTO,satellites/radio-rosto__1.jpg,RS-15,alive,Also known as RS 15. Built by a group of radio...,,,,...,1,CITATION NEEDED - https://xkcd.com/285/,2021-07-21 10:11:41.987219,,2021-07-21 10:11:41.987219,,24,,0,



=== base_satelliteidentifier ===
Row count: 2,920
Columns: id, sat_id, created
Preview:


Unnamed: 0,id,sat_id,created
0,1,SCHX-0895-2361-9925-0309,2021-07-21 10:11:41.945257
1,2,AMOM-6643-5608-9156-4431,2021-07-21 10:11:41.948554
2,3,KEFJ-8497-6394-9368-1937,2021-07-21 10:11:41.950438
3,4,FBFQ-2056-7966-4855-0749,2021-07-21 10:11:41.952251
4,5,BIRW-7828-0822-0647-1194,2021-07-21 10:11:41.954037



=== base_station ===
Row count: 3,912
Columns (20): id, name, image, alt, lat...
Preview:


Unnamed: 0,id,name,image,alt,lat,lng,featured_date,owner_id,created,qthlocator,last_seen,horizon,description,status,testing,client_version,target_utilization,violator_scheduling,client_id,active_configuration_changed
0,1,Hackerspace.gr 1,ground_stations/269750681_642106180166521_5486...,104,38.01697,23.7314,2015-10-11,848,2015-07-22 13:26:49,KM18ua,2022-10-05 12:49:26,40,"Yaesu 5500, usrp b200, 2x X-Quad Antenna 432 ...",0,1,1.6,100,1,,
1,2,KB9JHU,ground_stations/kb9jhu_P52k3jV.png,280,39.236,-86.305,2017-07-11,165,2015-07-22 14:24:10,EM69uf,2025-07-29 22:39:46,5,Yaesu G-5500 with M2 cross yagi antennas and S...,0,0,1.8.1,100,0,,
2,4,SV1IYO,,150,38.024,23.733,,168,2015-10-11 13:59:38,KM18ua,2024-12-24 09:37:11,0,,0,1,1.9.2+0.g4da08be.dirty,100,1,,
3,5,oe6xug,ground_stations/oe6xug.jpg,330,47.058979,15.460038,,170,2015-11-23 12:12:51,JN77rb,2025-03-24 10:09:13,0,2025-02-11: back in business triggered by fram...,0,1,1.0,0,0,,
4,6,Apomahon,,104,38.048,23.739,2016-04-25,172,2016-01-17 19:28:26,KM18ub,2025-11-10 09:53:03,20,"Patch 435 MHz , RTL-SDR V3",2,0,1.8.1,100,0,,



=== base_stationstatuslog ===
Row count: 298,893
Columns: id, status, changed, station_id
Preview:


Unnamed: 0,id,status,changed,station_id
0,1,2,2018-04-02 13:55:58,6
1,3,2,2018-04-02 13:55:58,12
2,4,2,2018-04-02 13:55:58,13
3,5,2,2018-04-02 13:55:58,15
4,6,2,2018-04-02 13:55:58,16



=== base_stationtype ===
Row count: 1
Columns: id, name
Preview:


Unnamed: 0,id,name
0,1,RF



=== base_telemetry ===
Row count: 185
Columns: id, name, decoder, satellite_id
Preview:


Unnamed: 0,id,name,decoder,satellite_id
0,1,ISS AX.25,iss,28
1,2,STRAND-1 Telemetry,strand,87
2,3,UNISAT-6 Telemetry,us6,132
3,4,FOX-1A Telemetry,fox,178
4,5,QBEE Telemetry,qbee,227



=== base_transmitterentry ===
Row count: 9,869
Columns (29): id, uuid, description, uplink_low, uplink_high...
Preview:


Unnamed: 0,id,uuid,description,uplink_low,uplink_high,downlink_low,downlink_high,invert,baud,approved,...,uplink_mode_id,reviewed,reviewer_id,satellite_id,iaru_coordination,iaru_coordination_url,itu_notification,unconfirmed,receive_review_update,review_message
0,1,ZAKErADdWKpMiDjvKKhmmB,Mode U TLM,,,437125000,,0,12.0,1,...,,2019-04-18 05:39:53.343316,,21,,,"{""urls"": []}",0,0,
1,3,ybJ86zjXzQxDReZ5skY56B,Mode H TLM,,,29352000,,0,0.0,1,...,,2019-04-18 05:39:53.343316,,24,,,"{""urls"": []}",0,0,
2,5,Zqa2ebzyRRBffvwkLnjTVc,Mode U CW Beacon,,,435795000,,0,0.0,1,...,,2019-04-18 05:39:53.343316,,25,,,"{""urls"": []}",0,0,
3,6,c4T33yxNiE8EAEc7V6LMQk,"Mode V/U APRS,BBS 9K6 FSK",145930000.0,,435225000,,1,9600.0,1,...,72.0,2019-04-18 05:39:53.343316,,27,,,"{""urls"": []}",0,0,
4,7,maYGaaMWsSBeDDDMpcM9ES,Mode V/U BBS1 9K6 FSK,145850000.0,,435225000,,1,9600.0,1,...,72.0,2019-04-18 05:39:53.343316,,27,,,"{""urls"": []}",0,0,


In [8]:
# --- Save schema summaries ---
import os

# Create reports directory if it doesn't exist
os.makedirs("../reports", exist_ok=True)

# Flatten into a DataFrame for CSV
schema_records = []
for s in schema_summaries:
    schema_records.append({
        "table": s["table"],
        "row_count": s["row_count"],
        "columns": ", ".join(s["columns"])
    })

schema_df = pd.DataFrame(schema_records)

# Save to CSV
csv_path = "../reports/schema_summary.csv"
schema_df.to_csv(csv_path, index=False)
print(f"‚úÖ Schema summary saved to: {csv_path}")

# Save to Markdown
md_path = "../reports/schema_summary.md"
with open(md_path, "w", encoding="utf-8") as f:
    f.write("# üìä SatNOGS Dataset Schema Summary\n\n")
    for s in schema_summaries:
        f.write(f"## Table: {s['table']}\n")
        f.write(f"- Row count: {s['row_count']:,}\n")
        f.write(f"- Columns: {', '.join(s['columns'])}\n\n")
        f.write("### Preview (first 5 rows)\n")
        f.write(s["preview"].head().to_markdown(index=False))
        f.write("\n\n---\n\n")
print(f"‚úÖ Markdown report saved to: {md_path}")

‚úÖ Schema summary saved to: ../reports/schema_summary.csv
‚úÖ Markdown report saved to: ../reports/schema_summary.md


In [9]:
# --- Quick Analysis of Key Tables ---
print("üìä Key Table Analysis:")
print("=" * 50)

# Observation table stats
obs_stats = pd.read_sql("""
SELECT 
    MIN(start) AS first_observation,
    MAX(start) AS last_observation,
    COUNT(*) AS total_observations
FROM base_observation
""", engine)

print(f"üìà Observations: {obs_stats['total_observations'][0]:,} records")
print(f"üìÖ Date range: {obs_stats['first_observation'][0]} to {obs_stats['last_observation'][0]}")

# Station count
station_count = pd.read_sql("SELECT COUNT(*) FROM base_station", engine).iloc[0,0]
print(f"üè≠ Stations: {station_count:,}")

# Satellite count
sat_count = pd.read_sql("SELECT COUNT(*) FROM base_satelliteentry", engine).iloc[0,0]
print(f"üõ∞Ô∏è Satellites: {sat_count:,}")

üìä Key Table Analysis:
üìà Observations: 12,546,241 records
üìÖ Date range: 2015-10-12 15:13:16 to 2025-11-12 09:59:41
üè≠ Stations: 3,912
üõ∞Ô∏è Satellites: 9,759
