# Prepare NANOGrav 15-year pulsar timing data

Extracts the official file `NANOGrav15yr_PulsarTiming_v2.1.0.tar.gz` → `Datasets_UTMF/`

Run this notebook **once** after placing the tar.gz file in the repository root.

In [None]:
import os
import tarfile
from pathlib import Path
from datetime import datetime

# ------------------------------------------------------------------
repo_root     = Path.cwd()
nanograv_file = repo_root / "NANOGrav15yr_PulsarTiming_v2.1.0.tar.gz"
target_dir    = repo_root / "Datasets_UTMF"

print(f"[{datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')}] Starting NANOGrav 15yr data preparation\n")

# Check presence of the archive
if not nanograv_file.exists():
    raise FileNotFoundError(
        f"File not found: {nanograv_file.name}\n\n"
        "Please download the official NANOGrav 15-year narrowband dataset from:\n"
        "https://data.nanograv.org → Narrowband Timing Data Release\n"
        "and place NANOGrav15yr_PulsarTiming_v2.1.0.tar.gz in the repository root."
    )

# Create target directory
target_dir.mkdir(exist_ok=True)
print(f"Target directory: {target_dir}\n")

# Check if already extracted
if any(p.is_dir() for p in target_dir.iterdir()):
    print("NANOGrav 15yr data already extracted – nothing to do.")
else:
    print(f"Extracting {nanograv_file.name} → {target_dir}/")
    with tarfile.open(nanograv_file, "r:gz") as tar:
        tar.extractall(path=target_dir)
    print("Extraction completed successfully.")

# Final status
folders = [p.name for p in target_dir.iterdir() if p.is_dir()]
print(f"\nReady! Found {len(folders)} top-level folders in Datasets_UTMF/")
print("You can now run UTMF_main.ipynb – it will automatically discover all pulsars.")