# Notebook prep

In [1]:
%load_ext autoreload
%autoreload 2

## Import packages

In [2]:
# Set the below env var so that GeoPandas only uses Shapely.
# This is to avoid a conflict between Shapely and PyGEOS. 
# NOTE: Must happen before geopandas import
import os
os.environ["USE_PYGEOS"] = "0"

In [3]:
import logging
logging.basicConfig(level=logging.INFO)

# Load Data

In [4]:
import pandas as pd

In [5]:
# Select random 1000 points (for testing)

points_gdf = pd.read_csv("../data/01_preprocessed/mosaiks_request_points/focus_shrid_centroids.csv", index_col=0)
# points_gdf = pd.read_csv("../tests/data/test_points.csv")
test_points_gdf = points_gdf.iloc[:100]

In [9]:
test_points_gdf

Unnamed: 0,pc11_s_id,pc11_d_id,pc11_sd_id,pc11_tv_id,is_urban,shrid,Lon,Lat
64240,8,99,457,64088,0,11-08-064088,73.411615,29.931021
64241,8,99,457,64089,0,11-08-064089,73.432416,29.945730
64242,8,99,457,64090,0,11-08-064090,73.455975,29.956586
64243,8,99,457,64091,0,11-08-064091,73.481895,29.964403
64244,8,99,457,64092,0,11-08-064092,73.505497,29.970132
...,...,...,...,...,...,...,...,...
64335,8,99,457,64183,0,11-08-064183,73.569569,29.894126
64336,8,99,457,64184,0,11-08-064184,73.590315,29.894766
64337,8,99,457,64185,0,11-08-064185,73.608573,29.898986
64338,8,99,457,64186,0,11-08-064186,73.630569,29.893151


## Check pipeline run

In [7]:
from mosaiks import get_features

In [None]:
# Run without parallelisation
df = get_features(
    test_points_gdf["Lat"], test_points_gdf["Lon"], parallelize=False
)
df

In [12]:
# Run with parallelisation
df = get_features(
    test_points_gdf["Lat"],
    test_points_gdf["Lon"],
    parallelize=True,
    dask_chunksize=50,
)
df

INFO:root:Checking inputs...
INFO:root:Formatting data and creating model...
INFO:root:Getting MOSAIKS features...
INFO:distributed.scheduler:State start
INFO:distributed.scheduler:  Scheduler at:     tcp://127.0.0.1:57282
INFO:distributed.scheduler:  dashboard at:  http://127.0.0.1:8787/status
INFO:distributed.nanny:        Start Nanny at: 'tcp://127.0.0.1:57287'
INFO:distributed.nanny:        Start Nanny at: 'tcp://127.0.0.1:57288'
INFO:distributed.nanny:        Start Nanny at: 'tcp://127.0.0.1:57285'
INFO:distributed.nanny:        Start Nanny at: 'tcp://127.0.0.1:57286'
INFO:distributed.scheduler:Register worker <WorkerState 'tcp://127.0.0.1:57294', name: 2, status: init, memory: 0, processing: 0>
INFO:distributed.scheduler:Starting worker compute stream, tcp://127.0.0.1:57294
INFO:distributed.core:Starting established connection to tcp://127.0.0.1:57303
INFO:distributed.scheduler:Register worker <WorkerState 'tcp://127.0.0.1:57293', name: 1, status: init, memory: 0, processing: 0>


Unnamed: 0,mosaiks_0,mosaiks_1,mosaiks_2,mosaiks_3,mosaiks_4,mosaiks_5,mosaiks_6,mosaiks_7,mosaiks_8,mosaiks_9,...,mosaiks_3991,mosaiks_3992,mosaiks_3993,mosaiks_3994,mosaiks_3995,mosaiks_3996,mosaiks_3997,mosaiks_3998,mosaiks_3999,stac_id
64240,0.000051,0.748541,0.000000,0.0,0.000029,2.932055,0.0,0.0,0.0,0.009794,...,6.717982,2.625506,0.215642,1.239383,1.400810,0.000007,8.744119,0.000052,6.558440,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64241,0.000225,0.291302,0.000000,0.0,0.000068,1.993396,0.0,0.0,0.0,0.003603,...,4.684032,1.938867,0.395840,1.319809,1.394013,0.000584,6.378771,0.000155,5.293884,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64242,0.001348,0.227962,0.000000,0.0,0.000077,1.861570,0.0,0.0,0.0,0.002462,...,4.458595,1.812618,0.441989,1.347400,1.405281,0.000689,6.135809,0.000835,5.091711,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64243,0.001145,0.324577,0.000017,0.0,0.000125,2.077808,0.0,0.0,0.0,0.006601,...,4.781091,1.943018,0.356420,1.343220,1.418509,0.000915,6.569235,0.000452,5.365047,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64244,0.001422,0.317001,0.000000,0.0,0.000305,2.068417,0.0,0.0,0.0,0.002483,...,4.326528,1.769958,0.294729,1.422448,1.476887,0.000939,6.235669,0.000353,5.339759,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64335,0.004655,0.239240,0.000008,0.0,0.000868,1.938740,0.0,0.0,0.0,0.000643,...,3.994800,1.572857,0.340763,1.470960,1.503547,0.002476,5.905729,0.002246,5.131596,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64336,0.006553,0.340233,0.000016,0.0,0.002029,2.144833,0.0,0.0,0.0,0.001593,...,4.210586,1.664815,0.262020,1.487726,1.531238,0.002996,6.271256,0.001169,5.415467,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64337,0.007216,0.267534,0.000029,0.0,0.002331,1.993466,0.0,0.0,0.0,0.001269,...,3.783626,1.500657,0.290552,1.538478,1.550738,0.005016,5.840925,0.001871,5.205710,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64338,0.006962,0.246378,0.000034,0.0,0.001422,1.965580,0.0,0.0,0.0,0.002750,...,4.059294,1.607396,0.340050,1.466098,1.500963,0.003116,5.985090,0.002377,5.189353,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."


In [13]:
df

Unnamed: 0,mosaiks_0,mosaiks_1,mosaiks_2,mosaiks_3,mosaiks_4,mosaiks_5,mosaiks_6,mosaiks_7,mosaiks_8,mosaiks_9,...,mosaiks_3991,mosaiks_3992,mosaiks_3993,mosaiks_3994,mosaiks_3995,mosaiks_3996,mosaiks_3997,mosaiks_3998,mosaiks_3999,stac_id
64240,0.000051,0.748541,0.000000,0.0,0.000029,2.932055,0.0,0.0,0.0,0.009794,...,6.717982,2.625506,0.215642,1.239383,1.400810,0.000007,8.744119,0.000052,6.558440,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64241,0.000225,0.291302,0.000000,0.0,0.000068,1.993396,0.0,0.0,0.0,0.003603,...,4.684032,1.938867,0.395840,1.319809,1.394013,0.000584,6.378771,0.000155,5.293884,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64242,0.001348,0.227962,0.000000,0.0,0.000077,1.861570,0.0,0.0,0.0,0.002462,...,4.458595,1.812618,0.441989,1.347400,1.405281,0.000689,6.135809,0.000835,5.091711,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64243,0.001145,0.324577,0.000017,0.0,0.000125,2.077808,0.0,0.0,0.0,0.006601,...,4.781091,1.943018,0.356420,1.343220,1.418509,0.000915,6.569235,0.000452,5.365047,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64244,0.001422,0.317001,0.000000,0.0,0.000305,2.068417,0.0,0.0,0.0,0.002483,...,4.326528,1.769958,0.294729,1.422448,1.476887,0.000939,6.235669,0.000353,5.339759,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64335,0.004655,0.239240,0.000008,0.0,0.000868,1.938740,0.0,0.0,0.0,0.000643,...,3.994800,1.572857,0.340763,1.470960,1.503547,0.002476,5.905729,0.002246,5.131596,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64336,0.006553,0.340233,0.000016,0.0,0.002029,2.144833,0.0,0.0,0.0,0.001593,...,4.210586,1.664815,0.262020,1.487726,1.531238,0.002996,6.271256,0.001169,5.415467,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64337,0.007216,0.267534,0.000029,0.0,0.002331,1.993466,0.0,0.0,0.0,0.001269,...,3.783626,1.500657,0.290552,1.538478,1.550738,0.005016,5.840925,0.001871,5.205710,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."
64338,0.006962,0.246378,0.000034,0.0,0.001422,1.965580,0.0,0.0,0.0,0.002750,...,4.059294,1.607396,0.340050,1.466098,1.500963,0.003116,5.985090,0.002377,5.189353,"[LC08_L2SP_149039_20131212_02_T1, LC08_L2SP_14..."


# Check wrapper function


In [None]:
from mosaiks.extras import load_and_save_features

In [None]:
%time
load_and_save_features(
    input_file_path="../data/01_preprocessed/mosaiks_request_points/focus_shrid_centroids.csv",
    path_to_save_data="COMBINED_features.csv",
    context_cols_to_keep_from_input=["Lat", "Lon", "shrid"],
    index_col=0,
    parallelize=True,
)

In [None]:
# # Check wrapper function
# from mosaiks.extras import load_and_save_features

# load_and_save_features(
#     input_file_path="../tests/data/test_points.csv",
#     path_to_save_data="../tests/data/test_features.csv",
#     context_cols_to_keep_from_input=["Lat", "Lon"],
# )