# Notebook prep

In [1]:
%load_ext autoreload
%autoreload 2

## Import packages

In [2]:
# Set the below env var so that GeoPandas only uses Shapely.
# This is to avoid a conflict between Shapely and PyGEOS. 
# NOTE: Must happen before geopandas import
import os
os.environ["USE_PYGEOS"] = "0"

In [3]:
import logging
logging.basicConfig(level=logging.INFO)

# Load Data

In [4]:
import pandas as pd

In [5]:
# Select random 1000 points (for testing)

points_gdf = pd.read_csv("../data/01_preprocessed/mosaiks_request_points/focus_shrid_centroids.csv", index_col=0)
# points_gdf = pd.read_csv("../tests/data/test_points.csv")
test_points_gdf = points_gdf.iloc[:10]

In [6]:
test_points_gdf

Unnamed: 0,pc11_s_id,pc11_d_id,pc11_sd_id,pc11_tv_id,is_urban,shrid,Lon,Lat
64240,8,99,457,64088,0,11-08-064088,73.411615,29.931021
64241,8,99,457,64089,0,11-08-064089,73.432416,29.94573
64242,8,99,457,64090,0,11-08-064090,73.455975,29.956586
64243,8,99,457,64091,0,11-08-064091,73.481895,29.964403
64244,8,99,457,64092,0,11-08-064092,73.505497,29.970132
64245,8,99,457,64093,0,11-08-064093,73.527965,29.980572
64246,8,99,457,64094,0,11-08-064094,73.551026,29.990531
64247,8,99,457,64095,0,11-08-064095,73.57125,29.997605
64248,8,99,457,64096,0,11-08-064096,73.590768,30.001459
64249,8,99,457,64097,0,11-08-064097,73.612898,30.00663


## Check pipeline run

In [7]:
from mosaiks import get_features

In [None]:
# Run without parallelisation
df = get_features(
    test_points_gdf["Lat"], test_points_gdf["Lon"], parallelize=False
)
df

In [8]:
from dask.distributed import LocalCluster, Client
cluster = LocalCluster(n_workers=2, threads_per_worker=1)
client = Client(cluster)

INFO:distributed.http.proxy:To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
INFO:distributed.scheduler:State start
INFO:distributed.diskutils:Found stale lock file and directory '/var/folders/99/cwxq7sdx5d94rqxj57_crqbc0000gn/T/dask-scratch-space/worker-g1i98b9o', purging
INFO:distributed.diskutils:Found stale lock file and directory '/var/folders/99/cwxq7sdx5d94rqxj57_crqbc0000gn/T/dask-scratch-space/worker-o7gnnm17', purging
INFO:distributed.diskutils:Found stale lock file and directory '/var/folders/99/cwxq7sdx5d94rqxj57_crqbc0000gn/T/dask-scratch-space/scheduler-1jo_qyxa', purging
INFO:distributed.scheduler:  Scheduler at:     tcp://127.0.0.1:60010
INFO:distributed.scheduler:  dashboard at:  http://127.0.0.1:8787/status
INFO:distributed.nanny:        Start Nanny at: 'tcp://127.0.0.1:60013'
INFO:distributed.nanny:        Start Nanny at: 'tcp://127.0.0.1:60014'
INFO:distributed.scheduler:Register worker <Worke

In [15]:
# Run with parallelisation
df = get_features(
    test_points_gdf["Lat"],
    test_points_gdf["Lon"],
    parallelize=True,
    dask_client=client,
    dask_chunksize=50,
)
df

INFO:root:Checking inputs...
INFO:root:Formatting data and creating model...
INFO:root:Getting MOSAIKS features...
INFO:root:Temporary directory: /Users/amiremami/IDinsight Repos/mosaiks/playground/dask_2023-07-20_16-48-47
INFO:root:Running 1 partition of 50 points each.
INFO:root:20-Jul 16:48:47 Running batch: 0 to 0
INFO:root:Loading and combining checkpoint files...


Unnamed: 0,mosaiks_0,mosaiks_1,mosaiks_2,mosaiks_3,mosaiks_4,mosaiks_5,mosaiks_6,mosaiks_7,mosaiks_8,mosaiks_9,...,mosaiks_3991,mosaiks_3992,mosaiks_3993,mosaiks_3994,mosaiks_3995,mosaiks_3996,mosaiks_3997,mosaiks_3998,mosaiks_3999,stac_id
64240,5.1e-05,0.748541,0.0,0.0,2.9e-05,2.932055,0.0,0.0,0.0,0.009794,...,6.717982,2.625506,0.215642,1.239383,1.40081,7e-06,8.744119,5.2e-05,6.55844,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64241,0.000225,0.291302,0.0,0.0,6.8e-05,1.993396,0.0,0.0,0.0,0.003603,...,4.684032,1.938867,0.39584,1.319809,1.394013,0.000584,6.378771,0.000155,5.293884,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64242,0.001348,0.227962,0.0,0.0,7.7e-05,1.86157,0.0,0.0,0.0,0.002462,...,4.458595,1.812618,0.441989,1.3474,1.405281,0.000689,6.135809,0.000835,5.091711,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64243,0.001145,0.324577,1.7e-05,0.0,0.000125,2.077808,0.0,0.0,0.0,0.006601,...,4.781091,1.943018,0.35642,1.34322,1.418509,0.000915,6.569235,0.000452,5.365047,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64244,0.001422,0.317001,0.0,0.0,0.000305,2.068417,0.0,0.0,0.0,0.002483,...,4.326528,1.769958,0.294729,1.422448,1.476887,0.000939,6.235669,0.000353,5.339759,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64245,0.003111,0.421574,0.0,0.0,0.00074,2.260952,0.0,0.0,0.0,0.009871,...,4.988877,1.987712,0.297145,1.364083,1.451255,0.000973,6.912478,0.001,5.599003,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64246,0.001872,0.378558,0.0,0.0,0.000852,2.199734,0.0,0.0,0.0,0.007018,...,4.956841,1.975404,0.344086,1.379479,1.446168,0.001448,6.855894,0.002131,5.53933,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64247,0.0048,0.357218,2.6e-05,0.0,0.001592,2.180335,0.0,0.0,0.0,0.004628,...,4.692131,1.884654,0.318314,1.432377,1.477475,0.001446,6.659574,0.003158,5.490084,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64248,0.00362,0.248236,3e-06,0.0,0.000643,1.926588,0.0,0.0,0.0,0.002313,...,4.11611,1.663922,0.344542,1.454411,1.484828,0.001205,6.012654,0.002457,5.135149,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64249,0.004253,0.158255,2.1e-05,0.0,0.001312,1.727535,0.0,0.0,0.0,0.000456,...,3.787818,1.538426,0.401735,1.468349,1.477377,0.002091,5.608218,0.004336,4.85715,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."


# Check wrapper function


In [None]:
from mosaiks.extras import load_and_save_features

In [13]:
%time
load_and_save_features(
    input_file_path="../data/01_preprocessed/mosaiks_request_points/focus_shrid_centroids.csv",
    output_folderpath="COMBINED_features.csv",
    context_cols_to_keep_from_input=["Lat", "Lon", "shrid"],
    index_col=0,
    parallelize=True,
)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 6.91 µs


NameError: name 'load_and_save_features' is not defined

In [None]:
# # Check wrapper function
# from mosaiks.extras import load_and_save_features

# load_and_save_features(
#     input_file_path="../tests/data/test_points.csv",
#     output_folderpath="../tests/data/test_features.csv",
#     context_cols_to_keep_from_input=["Lat", "Lon"],
# )