In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import sys
sys.path.append('./utils')
from utils_imports import *
from gpmap import GPMap, recalc

# required RAM

In [None]:
%load_ext memory_profiler

In [None]:
# we require 7GB of RAM
%memit recalc()

# todo: remove pixels from recalc that are in the sea

In [None]:
res = 1
method = "TransformedTargetRegressorWithUncertainty"
tolerance = 3.0
recalc_radius = 800000
region = "world"

In [None]:
points = get_points("dump.sqlite", begin=pd.Timestamp("2024-1-30"), until=pd.Timestamp("2024-3-30"))
points["lon"] = points.geometry.x
points["lat"] = points.geometry.y
points.head()

In [None]:
map = MapBasedModel(method=method, region="world", resolution=res, version="diff_mar")
map.get_map_grid()
recalc_radius_pixels = int(np.ceil(abs(recalc_radius / (map.grid[0][0][0] - map.grid[0][0][1]))))
recalc_radius_pixels

In [None]:
map.get_landmass_raster()
landmass = map.landmass_raster

In [None]:
plt.imshow(landmass, cmap='viridis', interpolation='nearest')

# Add a color bar to show the scale
plt.colorbar()

# Add labels for better understanding (optional)
plt.title("Heatmap of 2D Array")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")

# Show the plot
plt.show()
landmass.shape

In [None]:
m = MapBasedModel(method=method, region=region, resolution=res, version="tif")

m.raw_raster = np.ones(map.grid.shape[1:])

m.save_as_raster()

nodata = 0
with rasterio.open(m.rasterio_path) as heatmap:
    start = time.time()
    max_map_wait = heatmap.read().max()
    min_map_wait = heatmap.read().min()

    out_image, out_transform = rasterio.mask.mask(
        heatmap, country_shapes, nodata=nodata
    )
    out_meta = heatmap.meta
    print(f"Time elapsed to transform heatmap: {time.time() - start}")

out_image[0]

In [None]:
# Open the GeoTIFF file
with rasterio.open(new_map_path) as src:
    # Read the first band (assuming single-band raster, adjust for multi-band)
    numpy_array = src.read(1)

In [None]:
plt.imshow(numpy_array, cmap='viridis', interpolation='nearest')

# Add a color bar to show the scale
plt.colorbar()

# Add labels for better understanding (optional)
plt.title("Heatmap of 2D Array")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")

# Show the plot
plt.show()
numpy_array.shape

In [None]:
def pixel_from_point(point) -> tuple[int, int]:
    lats = map.Y.transpose()[0]
    lat_index = None
    for i, lat in enumerate(lats):
        if lat >= point["lat"] and point["lat"] >= lats[i+1]:
            lat_index = i
            break

    lons = map.X[0]
    lon_index = None
    for i, lon in enumerate(lons):
        if lon <= point["lon"] and point["lon"] <= lons[i+1]:
            lon_index = i
            break

    return (lat_index, lon_index)

In [None]:
recalc = np.zeros(map.grid.shape[1:])
recalc.shape

In [None]:
for i, point in points.iterrows():
    lat_pixel, lon_pixel = pixel_from_point(point)

    for i in range(lat_pixel - recalc_radius_pixels, lat_pixel + recalc_radius_pixels):
        for j in range(lon_pixel - recalc_radius_pixels, lon_pixel + recalc_radius_pixels):
            if i < 0 or j < 0 or i >= recalc.shape[0] or j >= recalc.shape[1]:
                continue
            recalc[i, j] = 1

In [None]:
plt.imshow(recalc, cmap='viridis', interpolation='nearest')

# Add a color bar to show the scale
plt.colorbar()

# Add labels for better understanding (optional)
plt.title("Heatmap of 2D Array")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")

# Show the plot
plt.show()

In [None]:
recalc_landmass = recalc * landmass

In [None]:
plt.imshow(recalc_landmass, cmap='viridis', interpolation='nearest')

# Add a color bar to show the scale
plt.colorbar()

# Add labels for better understanding (optional)
plt.title("Heatmap of 2D Array")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")

# Show the plot
plt.show()

In [None]:
print(recalc.sum(), recalc.shape[0] * recalc.shape[1], recalc.sum() / (recalc.shape[0] * recalc.shape[1]))
print(landmass.sum(), landmass.shape[0] * landmass.shape[1], landmass.sum() / (landmass.shape[0] * landmass.shape[1]))
print(recalc_landmass.sum(), recalc_landmass.shape[0] * recalc_landmass.shape[1], recalc_landmass.sum() / (recalc_landmass.shape[0] * recalc_landmass.shape[1]))

In [None]:
stop

In [None]:
points = get_points("dump.sqlite", until=pd.Timestamp("2024-03-30"))
points["lon"] = points.geometry.x
points["lat"] = points.geometry.y

X = points[["lon", "lat"]].values
y = points["wait"].values
X.shape, y.shape

with open("models/kernel.pkl", "rb") as file:
    gpr = pickle.load(file)
    
print(gpr.regressor.optimizer)
gpr.regressor.optimizer = None
print(gpr.regressor.optimizer)

gpr = fit_gpr_silent(gpr, X, y)

In [None]:
version = "mar_updated"
model = gpr
verbose = True

model_name = type(model).__name__

raster_maker = MapBasedModel(
    method=model_name,
    region=region,
    resolution=res,
    version=version,
    verbose=verbose,
)

jan_map = load_numpy_map(resolution=res, method=method, version="jan")
raster_maker.raw_raster = jan_map

raster_maker.get_map_grid()

# transposing the grid enables us to iterate over it vertically
# and single elements become lon-lat pairs that can be fed into the model
print("Compute rows of pixels...")
start = time.time()
to_predict = []
pixels_to_predict = []
for x, vertical_line in tqdm(
    enumerate(raster_maker.grid.transpose()), total=len(raster_maker.grid.transpose())
):
    for y, coords in enumerate(vertical_line):
        if recalc_landmass[y][x] == 0:
            continue
        this_point = [float(coords[0]), float(coords[1])]
        to_predict.append(this_point)
        pixels_to_predict.append((y, x))
        # batching the model calls
        if len(to_predict) == 1000:
            print(f"Predicting {len(to_predict)} pixels...")
            prediction = model.predict(np.array(to_predict), return_std=False)
            for i, (y, x) in enumerate(pixels_to_predict):
                raster_maker.raw_raster[y][x] = prediction[i]

            to_predict = []
            pixels_to_predict = []
        
print(f"Predicting {len(to_predict)} pixels...")
prediction = model.predict(np.array(to_predict), return_std=False)
for i, (y, x) in enumerate(pixels_to_predict):
    raster_maker.raw_raster[y][x] = prediction[i]

print(f"Time elapsed to compute full map: {time.time() - start}")
print(
    f"For map of shape: {raster_maker.raw_raster.shape} that is {raster_maker.raw_raster.shape[0] * raster_maker.raw_raster.shape[1]} pixels and an effective time per pixel of {(time.time() - start) / (raster_maker.raw_raster.shape[0] * raster_maker.raw_raster.shape[1])} seconds"
)
print((f"Only {recalc_landmass.sum()} pixels were recalculated. That is {recalc_landmass.sum() / (raster_maker.raw_raster.shape[0] * raster_maker.raw_raster.shape[1]) * 100}% of the map."))
print(f"And time per recalculated pixel was {(time.time() - start) / recalc_landmass.sum()} seconds")

save_numpy_map(
    raster_maker.raw_raster,
    region=region,
    method=model_name,
    resolution=res,
    version=version,
)

raster_maker.save_as_raster()
raster_maker.build_map() # for march
a = raster_maker.raw_raster

In [None]:
# for january
version = "jan"
verbose = True

model_name = type(model).__name__

raster_maker = MapBasedModel(
    method=model_name,
    region=region,
    resolution=res,
    version=version,
    verbose=verbose,
)
raster_maker.get_map_grid()
jan_map = load_numpy_map(resolution=res, method=method, version="jan")
raster_maker.raw_raster = jan_map
raster_maker.save_as_raster()
raster_maker.build_map()
b = raster_maker.raw_raster

In [None]:
diff = ~np.isclose(a, b, atol=tolerance)

In [None]:
plt.imshow(diff, cmap='viridis', interpolation='nearest')

# Add a color bar to show the scale
plt.colorbar()

# Add labels for better understanding (optional)
plt.title("Heatmap of 2D Array")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")

# Show the plot
plt.show()

In [None]:
stop

# high res

In [None]:
map.build_map()

In [None]:
res = 10
method = "TransformedTargetRegressorWithUncertainty"
tolerance = 3.0
recalc_radius = 800000
region = "world"

In [None]:
points = get_points("dump.sqlite", begin=pd.Timestamp("2024-1-30"), until=pd.Timestamp("2024-3-30"))
points["lon"] = points.geometry.x
points["lat"] = points.geometry.y
points.head(), len(points)

In [None]:
map = GPMap()
map.get_map_grid()
recalc_radius_pixels = int(np.ceil(abs(recalc_radius / (map.grid[0][0][0] - map.grid[0][0][1]))))
recalc_radius_pixels

In [None]:
map.get_landmass_raster()
landmass = map.landmass_raster

In [None]:
def pixel_from_point(point) -> tuple[int, int]:
    lats = map.Y.transpose()[0]
    lat_index = None
    for i, lat in enumerate(lats):
        if lat >= point["lat"] and point["lat"] >= lats[i+1]:
            lat_index = i
            break

    lons = map.X[0]
    lon_index = None
    for i, lon in enumerate(lons):
        if lon <= point["lon"] and point["lon"] <= lons[i+1]:
            lon_index = i
            break

    return (lat_index, lon_index)

In [None]:
recalc = np.zeros(map.grid.shape[1:])
recalc.shape

In [None]:
for i, point in points.iterrows():
    lat_pixel, lon_pixel = pixel_from_point(point)

    for i in range(lat_pixel - recalc_radius_pixels, lat_pixel + recalc_radius_pixels):
        for j in range(lon_pixel - recalc_radius_pixels, lon_pixel + recalc_radius_pixels):
            if i < 0 or j < 0 or i >= recalc.shape[0] or j >= recalc.shape[1]:
                continue
            recalc[i, j] = 1

In [None]:
plt.imshow(recalc, cmap='viridis', interpolation='nearest')

# Add a color bar to show the scale
plt.colorbar()

# Add labels for better understanding (optional)
plt.title("Heatmap of 2D Array")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")

# Show the plot
plt.show()

In [None]:
recalc_landmass = recalc * landmass

In [None]:
plt.imshow(recalc_landmass, cmap='viridis', interpolation='nearest')

# Add a color bar to show the scale
plt.colorbar()

# Add labels for better understanding (optional)
plt.title("Heatmap of 2D Array")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")

# Show the plot
plt.show()

In [None]:
print(recalc.sum(), recalc.shape[0] * recalc.shape[1], recalc.sum() / (recalc.shape[0] * recalc.shape[1]))
print(landmass.sum(), landmass.shape[0] * landmass.shape[1], landmass.sum() / (landmass.shape[0] * landmass.shape[1]))
print(recalc_landmass.sum(), recalc_landmass.shape[0] * recalc_landmass.shape[1], recalc_landmass.sum() / (recalc_landmass.shape[0] * recalc_landmass.shape[1]))

In [None]:
points = get_points("dump.sqlite", until=pd.Timestamp("2024-03-30"))
points["lon"] = points.geometry.x
points["lat"] = points.geometry.y

X = points[["lon", "lat"]].values
y = points["wait"].values
X.shape, y.shape

with open("models/kernel.pkl", "rb") as file:
    gpr = pickle.load(file)
    
print(gpr.regressor.optimizer)
gpr.regressor.optimizer = None
print(gpr.regressor.optimizer)

gpr = fit_gpr_silent(gpr, X, y)

In [None]:
version = "mar_updated"
model = gpr
verbose = True

model_name = type(model).__name__

raster_maker = MapBasedModel(
    method=model_name,
    region=region,
    resolution=res,
    version=version,
    verbose=verbose,
)

jan_map = load_numpy_map(resolution=res, method=method, version="jan")
raster_maker.raw_raster = jan_map

raster_maker.get_map_grid()

print("Compute pixels that are expected to differ...")
start = time.time()
to_predict = []
pixels_to_predict = []
for x, vertical_line in tqdm(
    enumerate(raster_maker.grid.transpose()), total=len(raster_maker.grid.transpose())
):
    for y, coords in enumerate(vertical_line):
        if recalc_landmass[y][x] == 0:
            continue
        this_point = [float(coords[0]), float(coords[1])]
        to_predict.append(this_point)
        pixels_to_predict.append((y, x))
        # batching the model calls
        if len(to_predict) == 10000:
            prediction = model.predict(np.array(to_predict), return_std=False)
            for i, (y, x) in enumerate(pixels_to_predict):
                raster_maker.raw_raster[y][x] = prediction[i]

            to_predict = []
            pixels_to_predict = []
        
prediction = model.predict(np.array(to_predict), return_std=False)
for i, (y, x) in enumerate(pixels_to_predict):
    raster_maker.raw_raster[y][x] = prediction[i]

print(f"Time elapsed to compute full map: {time.time() - start}")
print(
    f"For map of shape: {raster_maker.raw_raster.shape} that is {raster_maker.raw_raster.shape[0] * raster_maker.raw_raster.shape[1]} pixels and an effective time per pixel of {(time.time() - start) / (raster_maker.raw_raster.shape[0] * raster_maker.raw_raster.shape[1])} seconds"
)
print((f"Only {recalc_landmass.sum()} pixels were recalculated. That is {recalc_landmass.sum() / (raster_maker.raw_raster.shape[0] * raster_maker.raw_raster.shape[1]) * 100}% of the map."))
print(f"And time per recalculated pixel was {(time.time() - start) / recalc_landmass.sum()} seconds")

save_numpy_map(
    raster_maker.raw_raster,
    region=region,
    method=model_name,
    resolution=res,
    version=version,
)

raster_maker.save_as_raster()
raster_maker.build_map() # for march
a = raster_maker.raw_raster

In [None]:
# for january
version = "jan"
verbose = True

model_name = type(model).__name__

raster_maker = MapBasedModel(
    method=model_name,
    region=region,
    resolution=res,
    version=version,
    verbose=verbose,
)
raster_maker.get_map_grid()
jan_map = load_numpy_map(resolution=res, method=method, version="jan")
raster_maker.raw_raster = jan_map
raster_maker.save_as_raster()
raster_maker.build_map()
b = raster_maker.raw_raster

In [None]:
diff = ~np.isclose(a, b, atol=tolerance)

In [None]:
plt.imshow(diff, cmap='viridis', interpolation='nearest')

# Add a color bar to show the scale
plt.colorbar()

# Add labels for better understanding (optional)
plt.title("Heatmap of 2D Array")
plt.xlabel("X-axis")
plt.ylabel("Y-axis")

# Show the plot
plt.show()

# determine area of map to update around newly added points

In [None]:
months = ["jan", "mar", "may", "jul", "sep", "nov"]
dates = ["2024-1-30", "2024-3-30", "2024-5-30", "2024-7-30", "2024-9-30", "2024-11-30"]

for i in range(5):
    print(f"Months: {months[i]} - {months[i+1]}")
    points = get_points("dump.sqlite", begin=pd.Timestamp(dates[i]), until=pd.Timestamp(dates[i+1]))
    points["lon"] = points.geometry.x
    points["lat"] = points.geometry.y

    map1 = load_numpy_map(resolution=res, method=method, version=months[i])
    map2 = load_numpy_map(resolution=res, method=method, version=months[i+1])
    diff = ~np.isclose(map1, map2, atol=tolerance)
    diff = diff * 100

    map = MapBasedModel(method=method, region="world", resolution=res, version="diff_mar")
    map.raw_raster = diff
    map.get_map_grid()
    map.save_as_raster()
    map.build_map(points=points, show_points=True)