## 1. Import Dependencies

In [1]:
import os

# Resolves a conflict in Geopandas. Improves speed. Slower otherwise
os.environ["USE_PYGEOS"] = "0"

## 2. Import test data. 
In this case, we are creating random GPS coordinates

In [2]:
# Example: Select 5 coordinates in Uttar Pradesh, India
lats = [26.51268717, 26.55187804, 26.54949092, 26.54105597, 26.54843896]
lons = [80.51489844, 80.54864309, 80.57813289, 80.51412136, 80.52254959]

## 3. Execute a default run of the `get_features` function

In [3]:
from mosaiks import get_features

df_featurised = get_features(
    lats,
    lons,
    image_width=3000,
    search_start="2013-01-01",
    search_end="2013-12-31",
)

df_featurised

Unnamed: 0,mosaiks_0,mosaiks_1,mosaiks_2,mosaiks_3,mosaiks_4,mosaiks_5,mosaiks_6,mosaiks_7,mosaiks_8,mosaiks_9,...,mosaiks_3991,mosaiks_3992,mosaiks_3993,mosaiks_3994,mosaiks_3995,mosaiks_3996,mosaiks_3997,mosaiks_3998,mosaiks_3999,stac_id
0,1.6e-05,0.647995,0.0,0.0,1.1e-05,3.135374,0.0,0.0,0.0,0.005591,...,6.090156,2.323138,0.327668,1.336866,1.492111,0.001369,8.252589,0.0,7.045659,"[LC08_L2SP_144042_20130429_02_T1, LC08_L2SP_14..."
1,0.0,0.551128,0.0,0.0,0.0,2.930943,0.0,0.0,0.0,0.004028,...,5.771402,2.192217,0.373875,1.346451,1.481802,0.00017,7.859435,5.7e-05,6.742852,"[LC08_L2SP_144042_20130429_02_T1, LC08_L2SP_14..."
2,0.0,0.362864,0.0,0.0,0.0,2.636176,0.0,0.0,0.0,0.002734,...,5.688252,2.175198,0.5712,1.30489,1.418789,0.001191,7.534529,0.0,6.409752,"[LC08_L2SP_144042_20130429_02_T1, LC08_L2SP_14..."
3,0.0,0.55164,0.0,0.0,0.0,2.817587,0.0,0.0,0.0,0.003701,...,5.643971,2.212207,0.328474,1.318264,1.460062,5e-05,7.646603,0.0,6.575275,"[LC08_L2SP_144042_20130429_02_T1, LC08_L2SP_14..."
4,0.0,0.181617,0.0,0.0,0.0,1.728694,0.0,0.0,0.0,0.001331,...,4.216232,1.82592,0.481479,1.241069,1.338898,0.002317,5.678021,0.002192,4.964864,"[LC08_L2SP_144042_20130429_02_T1, LC08_L2SP_14..."


## Run `get_features` with Dask parallelization

To run the code with the built-in Dask parallelization, set `parallelize` to `True` and `dask_chunksize` to a suitable integer given the size of your dataset.

In [None]:
df_featurised = get_features(
    lats,
    lons,
    image_width=3000,
    search_start="2013-01-01",
    search_end="2013-12-31",
    parallelize=True,
    dask_chunksize=100,
)

df_featurised

## 5. Run Utility function to load data and save features
In situations where you want to load data, run featurisation, and save features on disk, quietly, you can use the load_and_save_features:

In [None]:
# Save test data to file to load later
import pandas as pd

df = pd.DataFrame({"lat": lats, "lon": lons})
df.to_csv("test_data.csv")

In [None]:
from mosaiks.extras import load_and_save_features

load_and_save_features(
    input_file_path="test_data.csv",
    lat_col="lat",
    lon_col="lon",
    output_folderpath="test_features.csv",
    image_width=3000,
    search_start="2013-01-01",
    search_end="2013-12-31",
    context_cols_to_keep_from_input=["lat", "lon"],
)