In [47]:
import os
from dotenv import load_dotenv
import requests
import time

load_dotenv()

API_TOKEN = os.getenv("API_TOKEN")
header = {'Authorization': f'Token {API_TOKEN}'}
signalurl = "https://network.satnogs.org/api/observations/?waterfall_status=1&format=json"
nosignalurl = "https://network.satnogs.org/api/observations/?waterfall_status=0&format=json"
NUM_PAGES = 800*2

In [48]:
prepare_directores = True
if prepare_directores:
    os.makedirs("data", exist_ok=True)
    os.makedirs("data/with_signal", exist_ok=True)
    os.makedirs("data/without_signal", exist_ok=True)

In [49]:
link = signalurl

def download_waterfalls(url, folder, num_pages):
    link = url
    for i in range(num_pages - 1):
        response = requests.get(link, headers=header)
        if response.status_code == 429:
            print("Rate limit exceeded. Waiting...")
            time.sleep(60)
            response = requests.get(link, headers=header)

        observation_ids = [k['id'] for k in response.json()]
        waterfall_urls = [k['waterfall'] for k in response.json()]
        transmitter_modes = [k['transmitter_mode'] or 'UNKNWN' for k in response.json()]
        print(f"Page {i+1}: Downloading {len(waterfall_urls)} waterfalls")
        for obs_id, waterfall_url, transmitter_mode in zip(observation_ids, waterfall_urls, transmitter_modes):
            waterfall_data = requests.get(waterfall_url).content
            with open(f"{folder}/{obs_id}_{transmitter_mode}.png", "wb") as f:
                f.write(waterfall_data)
        link = response.links.get('next', {}).get('url', None)
        if link is None:
            break

In [50]:
if False:  # Change to True to download data
    download_waterfalls(signalurl, "data/with_signal", NUM_PAGES)
    download_waterfalls(nosignalurl, "data/without_signal", NUM_PAGES)

In [51]:
import cv2
import numpy as np
from pathlib import Path

def crop_waterfall(image_path, output_path=None,
                   black_threshold=40,
                   min_horizontal_fraction=0.7):
    image_path = Path(image_path)
    img = cv2.imread(str(image_path))
    if img is None:
        raise ValueError(f"Could not read image at {image_path}")

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    h, w = gray.shape
    is_black = gray < black_threshold
    x_probe = w // 4
    y_top = None
    for y in range(h):
        if is_black[y, x_probe]:
            y_top = y
            break
    if y_top is None:
        raise ValueError("Top border not found.")

    x_left = x_probe
    while x_left > 0 and is_black[y_top, x_left - 1]:
        x_left -= 1

    x_right = x_probe
    while x_right < w - 1 and is_black[y_top, x_right + 1]:
        x_right += 1

    span_width = x_right - x_left + 1
    y_bottom = None
    for y in range(h - 1, -1, -1):
        frac_black = is_black[y, x_left:x_right + 1].mean()
        if frac_black >= min_horizontal_fraction:
            y_bottom = y
            break
    if y_bottom is None:
        raise ValueError("Bottom border not found.")

    y0 = max(y_top + 1, 0)
    y1 = min(y_bottom, h - 1)
    x0 = max(x_left + 1, 0)
    x1 = min(x_right, w - 1)

    dimensions = (x1 - x0, y1 - y0)

    cropped = img[y0:y1, x0:x1]
    if cropped.size == 0:
        raise ValueError("Computed crop is empty; check thresholds.")

    if output_path is not None:
        output_path = Path(output_path)
        cv2.imwrite(str(output_path), cropped)

    return dimensions

In [52]:
import glob

if False:
    dimensions_with = []

    datadir_with = 'data/with_signal'
    datadir_without = 'data/without_signal'
    for filepath in glob.glob(os.path.join(datadir_with, '*.png')):
        dim = crop_waterfall(filepath, output_path=filepath)
        dimensions_with.append(dim)

    print(f"Processed {len(dimensions_with)} images with waterfalls.")

In [53]:
if False:
    dimensions_without = []

    datadir_with = 'data/with_signal'
    datadir_without = 'data/without_signal'
    for filepath in glob.glob(os.path.join(datadir_without, '*.png')):
        try:
            dim = crop_waterfall(filepath, output_path=filepath)
            dimensions_without.append(dim)
        except Exception as e:
            if str(e) == "Top border not found." or str(e) == "Bottom border not found." or str(e) == "Computed crop is empty; check thresholds.":
                # skip already processed img
                continue
            print(f"Error processing {filepath}: {e}")

    print(f"Processed {len(dimensions_without)} images without waterfalls.")