## "Raw" notebook
This is the raw jupyter notebook used for analysis. If you wish to reproduce our results, run all cells of this notebook in a properly setup EMR notebook.

In [None]:
# Installing newest version of pip to resolve compatibility issues
# requests for fetching terrain tile data, matplotlib for visualisation
# sc is the default spark context
sc.install_pypi_package("pip==23.2.1")
sc.install_pypi_package("requests")
sc.install_pypi_package("matplotlib")

In [None]:
# stdlib imports
from time import time
from itertools import product
from typing import Tuple, List, NamedTuple
from collections import namedtuple

# util libraries imports
import numpy as np
import matplotlib.pyplot as plt

# PySpark related imports
from pyspark.sql.types import IntegerType
import pyspark.sql.functions as spark_func


In [None]:
# Constants used throughout the notebook
Tile: NamedTuple = namedtuple("Tile", ("zoom", "x", "y"))

TEMP_DIR: str = "data"
DATASET_URL_FORMAT: str = "s3://elevation-tiles-prod/terrarium"
ZOOM: int = 3
BOUNDS: Tuple[float] = (71.691037, -163.084981, -58.205805, -28.365288)
TILE_WIDTH: int = 256
TILE_HEIGHT: int = 256
CHANNELS_NUM: int = 3
RED_CHANNEL_MULTIPLIER: int = 256
GREEN_CHANNEL_MULTIPLIER: int = 1
BLUE_CHANNEL_MULTIPLIER: int = 1/256
INCREMENT_OFFSET: int = 32768
GRADIENT_GROUPS: dict[int, int] = {
    0: 2,
    1: 10,
    2: 50,
    3: 150,
    4: 500,
    5: 1500
}

## Functions related to transforming data, later used as map functions on the RDD 

In [None]:
def mercator_projection(lat: float, lon: float, zoom: int) -> Tuple[int, int, int]:
    """
    Convert latitude, longitude to z/x/y tile coordinate at given zoom.

    Parameters:
    - lat (float): Latitude in degrees.
    - lon (float): Longitude in degrees.
    - zoom (int): Zoom level.

    Returns:
    - Tuple[int, int, int]: A tuple containing the zoom level, x and y tile coordinates.
    """
    # Convert latitude and longitude to radians
    x1: float = lon * np.pi / 180.
    y1: float = lat * np.pi / 180.

    # Mercator projection https://en.wikipedia.org/wiki/Mercator_projection
    x2 = x1
    y2 =  np.log(np.tan(0.25 * np.pi + 0.5 * y1))

    # Transform to tile space
    tiles = 2 ** zoom
    diameter =  2 * np.pi
    x3 = int(tiles * (x2 + np.pi) / diameter)
    y3 = int(tiles * (np.pi - y2) / diameter)

    return zoom, x3, y3


def get_elevation(tile: np.ndarray) -> np.ndarray:
    """
    Calculates elevation for an entire map tile.

    Parameters:
    - tile (np.ndarray): A NumPy array representing the tile.

    Returns:
    - np.ndarray: A NumPy array containing the elevation values.
    """
    return np.array([list(map(calculate_elevation, row)) for row in tile])


## Functions related to fetching data and other miscellanous transformations

In [None]:
def get_tiles(zoom: int, lat1: float, lon1: float, lat2: float, lon2: float) -> List[Tile]:
    """
    Convert geographic bounds into a list of tile coordinates at given zoom.

    Parameters:
    - zoom (int): Zoom level.
    - lat1, lon1, lat2, lon2 (float): Geographic bounds.

    Returns:
    - List[Tile]: A list of Tile namedtuples.
    """
    # Convert to geographic bounding box
    min_lat = min(lat1, lat2)
    min_lon =  min(lon1, lon2)
    max_lat = max(lat1, lat2)
    max_lon =  max(lon1, lon2)

    # Convert to tile-space bounding box
    _, xmin, ymin = mercator_projection(max_lat, min_lon, zoom)
    _, xmax, ymax = mercator_projection(min_lat, max_lon, zoom)

    # Generate a list of tiles
    xs = range(xmin, xmax+1)
    ys = range(ymin, ymax+1)

    return [Tile(zoom, x, y) for (y, x) in product(ys, xs)]

def generate_links(tiles: List[Tile]) -> List[str]:
    """
    Returns links to s3 bucket for given tiles.

    Parameters:
    - tiles (List[Tile]): A list of Tile namedtuples.

    Returns:
    - List[str]: A list of URLs.
    """
    return [f"{DATASET_URL_FORMAT}/{tile.zoom}/{tile.x}/{tile.y}.png" for tile in tiles]


## Functions related to elevations, whether it's coloring the map or calculating color of a pixel that should be displayed