In [47]:
import os
from dotenv import load_dotenv
import requests
import time

load_dotenv()

API_TOKEN = os.getenv("API_TOKEN")
header = {'Authorization': f'Token {API_TOKEN}'}
signalurl = "https://network.satnogs.org/api/observations/?waterfall_status=1&format=json"
nosignalurl = "https://network.satnogs.org/api/observations/?waterfall_status=0&format=json"
NUM_PAGES = 800*2

In [48]:
prepare_directores = True
if prepare_directores:
    os.makedirs("data", exist_ok=True)
    os.makedirs("data/with_signal", exist_ok=True)
    os.makedirs("data/without_signal", exist_ok=True)

In [49]:
link = signalurl

def download_waterfalls(url, folder, num_pages):
    link = url
    for i in range(num_pages - 1):
        response = requests.get(link, headers=header)
        if response.status_code == 429:
            print("Rate limit exceeded. Waiting...")
            time.sleep(60)
            response = requests.get(link, headers=header)

        observation_ids = [k['id'] for k in response.json()]
        waterfall_urls = [k['waterfall'] for k in response.json()]
        transmitter_modes = [k['transmitter_mode'] or 'UNKNWN' for k in response.json()]
        print(f"Page {i+1}: Downloading {len(waterfall_urls)} waterfalls")
        for obs_id, waterfall_url, transmitter_mode in zip(observation_ids, waterfall_urls, transmitter_modes):
            waterfall_data = requests.get(waterfall_url).content
            with open(f"{folder}/{obs_id}_{transmitter_mode}.png", "wb") as f:
                f.write(waterfall_data)
        link = response.links.get('next', {}).get('url', None)
        if link is None:
            break

In [50]:
if False:  # Change to True to download data
    download_waterfalls(signalurl, "data/with_signal", NUM_PAGES)
    download_waterfalls(nosignalurl, "data/without_signal", NUM_PAGES)

In [51]:
import cv2
import numpy as np
from pathlib import Path

def crop_waterfall(image_path, output_path=None,
                   black_threshold=40,
                   min_horizontal_fraction=0.7):
    image_path = Path(image_path)
    img = cv2.imread(str(image_path))
    if img is None:
        raise ValueError(f"Could not read image at {image_path}")

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    h, w = gray.shape
    is_black = gray < black_threshold
    x_probe = w // 4
    y_top = None
    for y in range(h):
        if is_black[y, x_probe]:
            y_top = y
            break
    if y_top is None:
        raise ValueError("Top border not found.")

    x_left = x_probe
    while x_left > 0 and is_black[y_top, x_left - 1]:
        x_left -= 1

    x_right = x_probe
    while x_right < w - 1 and is_black[y_top, x_right + 1]:
        x_right += 1

    span_width = x_right - x_left + 1
    y_bottom = None
    for y in range(h - 1, -1, -1):
        frac_black = is_black[y, x_left:x_right + 1].mean()
        if frac_black >= min_horizontal_fraction:
            y_bottom = y
            break
    if y_bottom is None:
        raise ValueError("Bottom border not found.")

    y0 = max(y_top + 1, 0)
    y1 = min(y_bottom, h - 1)
    x0 = max(x_left + 1, 0)
    x1 = min(x_right, w - 1)

    dimensions = (x1 - x0, y1 - y0)

    cropped = img[y0:y1, x0:x1]
    if cropped.size == 0:
        raise ValueError("Computed crop is empty; check thresholds.")

    if output_path is not None:
        output_path = Path(output_path)
        cv2.imwrite(str(output_path), cropped)

    return dimensions

In [52]:
import glob

if False:
    dimensions_with = []

    datadir_with = 'data/with_signal'
    datadir_without = 'data/without_signal'
    for filepath in glob.glob(os.path.join(datadir_with, '*.png')):
        dim = crop_waterfall(filepath, output_path=filepath)
        dimensions_with.append(dim)

    print(f"Processed {len(dimensions_with)} images with waterfalls.")

In [53]:
if False:
    dimensions_without = []

    datadir_with = 'data/with_signal'
    datadir_without = 'data/without_signal'
    for filepath in glob.glob(os.path.join(datadir_without, '*.png')):
        try:
            dim = crop_waterfall(filepath, output_path=filepath)
            dimensions_without.append(dim)
        except Exception as e:
            if str(e) == "Top border not found." or str(e) == "Bottom border not found." or str(e) == "Computed crop is empty; check thresholds.":
                # skip already processed img
                continue
            print(f"Error processing {filepath}: {e}")

    print(f"Processed {len(dimensions_without)} images without waterfalls.")

In [75]:
def grayscale(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return gray

def downscale_in_half(img):
    height, width = img.shape[:2]
    new_dimensions = (width // 2, height // 2)
    downscaled = cv2.resize(img, new_dimensions, interpolation=cv2.INTER_AREA)
    return downscaled

def remove_rl_edges(img, edge_width=50):
    h, w = img.shape[:2]
    cropped = img[0:h, edge_width:w - edge_width]
    return cropped

def grayscale_downscale_normalize(im_path, output_path=None):
    img = cv2.imread(im_path)
    if img is None:
        raise ValueError("Input image is None")
    
    h, w = img.shape[:2]
    if h >= 1500 and w >= 600:
        # Large image, crop it down to 1500x600 relative to center
        center_y, center_x = h // 2, w // 2
        half_h, half_w = 1500 // 2, 600 // 2
        y0 = max(center_y - half_h, 0)
        y1 = min(center_y + half_h, h)
        x0 = max(center_x - half_w, 0)
        x1 = min(center_x + half_w, w)
        img = img[y0:y1, x0:x1]
    else:
        print(f"Image {im_path} too small ({w}x{h}), deleting.")
        if input_path == output_path:
            os.remove(im_path)
        return None
    gray = grayscale(img)
    downscaled = remove_rl_edges(downscale_in_half(gray))
    if output_path is not None:
        output_path = Path(output_path)
        cv2.imwrite(str(output_path), downscaled)
    return downscaled

In [76]:
if True:
    datadir_with = 'data/with_signal'
    datadir_without = 'data/without_signal'

    for filepath in glob.glob(os.path.join(datadir_with, '*.png')):
        try:
            grayscale_downscale_normalize(filepath, output_path=filepath)
        except Exception as e:
            print(f"Error processing {filepath}: {e}")
    print("Finished processing with-signal images.")

    for filepath in glob.glob(os.path.join(datadir_without, '*.png')):
        try:
            grayscale_downscale_normalize(filepath, output_path=filepath)
        except Exception as e:
            print(f"Error processing {filepath}: {e}")
    print("Finished processing without-signal images.")

Finished processing with-signal images.
Image data/without_signal/12833378_FSK AX.100 Mode 5.png too small (2x1543), deleting.
Error processing data/without_signal/12833378_FSK AX.100 Mode 5.png: name 'input_path' is not defined
Image data/without_signal/12789005_FM.png too small (1x1405), deleting.
Error processing data/without_signal/12789005_FM.png: name 'input_path' is not defined
Image data/without_signal/12802333_BPSK.png too small (4x1398), deleting.
Error processing data/without_signal/12802333_BPSK.png: name 'input_path' is not defined
Image data/without_signal/12845138_FSK AX.100 Mode 5.png too small (4x843), deleting.
Error processing data/without_signal/12845138_FSK AX.100 Mode 5.png: name 'input_path' is not defined
Image data/without_signal/12775189_SSTV.png too small (4x1511), deleting.
Error processing data/without_signal/12775189_SSTV.png: name 'input_path' is not defined
Image data/without_signal/12860964_BPSK.png too small (1x138), deleting.
Error processing data/wit

In [79]:
def final_dimension_cleanup():
    datadir_with = 'data/with_signal'
    datadir_without = 'data/without_signal'

    for filepath in glob.glob(os.path.join(datadir_with, '*.png')):
        img = cv2.imread(filepath)
        if img is None:
            continue
        h, w = img.shape[:2]
        if h != 750 and w != 200:
            print(f"Deleting unusually small image: {filepath} with size {w}x{h}")
            os.remove(filepath)

    print("Cleaned up with-signal images.")

    for filepath in glob.glob(os.path.join(datadir_without, '*.png')):
        img = cv2.imread(filepath)
        if img is None:
            continue
        h, w = img.shape[:2]
        if h != 750 and w != 200:
            print(f"Deleting unusually small image: {filepath} with size {w}x{h}")
            os.remove(filepath)
    print("Cleaned up without-signal images.")

In [80]:
if True: final_dimension_cleanup()

Cleaned up with-signal images.
Deleting unusually small image: data/without_signal/12833378_FSK AX.100 Mode 5.png with size 2x1543
Deleting unusually small image: data/without_signal/12789005_FM.png with size 1x1405
Deleting unusually small image: data/without_signal/12802333_BPSK.png with size 4x1398
Deleting unusually small image: data/without_signal/12845138_FSK AX.100 Mode 5.png with size 4x843
Deleting unusually small image: data/without_signal/12775189_SSTV.png with size 4x1511
Deleting unusually small image: data/without_signal/12860964_BPSK.png with size 1x138
Deleting unusually small image: data/without_signal/12898200_BPSK.png with size 1x1467
Deleting unusually small image: data/without_signal/12980085_BPSK.png with size 4x1512
Deleting unusually small image: data/without_signal/13011850_FM.png with size 2x1515
Deleting unusually small image: data/without_signal/12834085_MSK AX.100 Mode 5.png with size 1x1530
Deleting unusually small image: data/without_signal/13012875_CW.pn