In [None]:
%pip install rasterio xarray rioxarray owslib osmnx h3 folium pandas==1.3.5

In [1]:
import mlrun

In [2]:
query = "Bologna"

# Create Project

In [3]:
project = mlrun.get_or_create_project("mobility-data", context="./") 

> 2024-06-13 14:18:43,348 [info] Project loaded successfully: {'project_name': 'mobility-data'}


# Define and Build Functions: Download OSM-related Data

In [4]:
project.set_function("src/download-data.py", name="download-dem", kind="job", image="mlrun/mlrun", handler="download_dem", requirements=["rasterio", "xarray", "rioxarray", "owslib", "osmnx", "h3", "folium", "pandas==1.3.5"])
project.set_function("src/download-data.py", name="download-osm", kind="job", image="mlrun/mlrun", handler="download_osm", requirements=["rasterio", "xarray", "rioxarray", "owslib", "osmnx", "h3", "folium", "pandas==1.3.5"])
project.set_function("src/download-data.py", name="merge-osm-dem", kind="job", image="mlrun/mlrun", handler="merge_osm_dem", requirements=["rasterio", "xarray", "rioxarray", "owslib", "osmnx", "h3", "folium", "pandas==1.3.5"])
project.set_function("src/share-data.py", name="share", kind="job", image="mlrun/mlrun", handler="share_files")
project.save()



<mlrun.projects.project.MlrunProject at 0x7ff458a28040>

In [None]:
project.build_function("download-dem")
project.build_function("download-osm")
project.build_function("merge-osm-dem")
project.save()

# Define and Build Functions: Download Bologna Open Data

In [5]:
project.set_function("src/download-open-data.py", name="download-road-areas", kind="job", image="mlrun/mlrun", handler="download_road_areas", requirements=["geopandas==0.13.2"])
project.set_function("src/download-open-data.py", name="download-road-edges", kind="job", image="mlrun/mlrun", handler="download_road_edges", requirements=["geopandas==0.13.2"])
project.set_function("src/download-open-data.py", name="download-road-nodes", kind="job", image="mlrun/mlrun", handler="download_road_nodes", requirements=["geopandas==0.13.2"])
project.set_function("src/download-open-data.py", name="download-curves", kind="job", image="mlrun/mlrun", handler="download_curves", requirements=["geopandas==0.13.2"])
project.set_function("src/download-open-data.py", name="download-sidewalks", kind="job", image="mlrun/mlrun", handler="download_sidewalks", requirements=["geopandas==0.13.2"])
project.set_function("src/download-open-data.py", name="download-city30", kind="job", image="mlrun/mlrun", handler="download_city_30", requirements=["geopandas==0.13.2"])
project.save()



<mlrun.projects.project.MlrunProject at 0x7ff458a28040>

In [None]:
project.build_function("download-road-areas")
project.build_function("download-road-edges")
project.build_function("download-road-nodes")
project.build_function("download-curves")
project.build_function("download-sidewalks")
project.build_function("download-city30")
project.save()

# Defnie and Execute Pipeline 

In [6]:
project.set_workflow("mobility-data-pipeline", "./src/mobility-data-pipeline.py", handler="pipeline")
project.save()

<mlrun.projects.project.MlrunProject at 0x7ff458a28040>

### Initialize Google Token for GDrive connection

In [None]:
import os
os.environ['GOOGLE_TOKEN'] = '<GOOGLE_TOKEN_JSON>' # for local execution
project.set_secrets({"GOOGLE_TOKEN": os.environ['GOOGLE_TOKEN']}) # for remote execution

### Run Pipeline

In [None]:
project.run("mobility-data-pipeline", artifact_path='s3://datalake/projects/mobility-data/artifacts/mobility-data-pipeline')

# Define and Build Functions: Download Traffic Spire

In [None]:
project.set_function("src/download-spire.py", name="download-traffic-spire", kind="job", image="mlrun/mlrun", handler="get_spire")
project.save()
project.run_function("download-traffic-spire", local=False)

> 2024-06-13 14:56:44,998 [info] Storing function: {'name': 'download-traffic-spire-get-spire', 'uid': '6a14afad918647adbed208751626b3ac', 'db': 'http://mlrun-api:8080'}
> 2024-06-13 14:56:47,166 [info] Job is running in the background, pod: download-traffic-spire-get-spire-l6vhj
