# Common Practices for Spatial Data Science
* [code carbon tool](https://mlco2.github.io/codecarbon/)

In [None]:
from arcgis.features import GeoAccessor
import arcpy
from codecarbon import track_emissions
from configparser import ConfigParser
import logging
import numpy as np
import os
import pandas as pd

In [None]:
from traffic.read import read_sqlite_to_featureclass, read_sqlite_as_featureclass

## Ramp up the environment
* [Install and set up ArcGIS](https://developers.arcgis.com/python/guide/intro/)
* setup logging
* read config file
* fail early

In [None]:
def read_emissions(filepath: str):
    return pd.read_csv(filepath)

In [None]:
logging.basicConfig()
logger = logging.getLogger("codecarbon")
    
config = ConfigParser()
config.read("config.user")

traffic_filepath = config["DEFAULT"]["TrafficFilePath"]
if None is traffic_filepath:
    raise ValueError("Traffic file path not specified!")

## 1) Understanding where
If you don’t know where you are, you are lost. Understanding where is about
putting the world in context. Where are you? What is around you? Very similar
to when you were two years old, your journey of spatial analysis
requires an understanding of how you fit into your geography.

### The problem
The traffic data contains agent positions using latitude and longitude. We need to create geometries for mapping the agent positions.

### The solutions
* read the data using a [spatially enabled data frame](https://developers.arcgis.com/python/guide/introduction-to-the-spatially-enabled-dataframe/)

* read the data using a [insert cursor](https://pro.arcgis.com/en/pro-app/latest/arcpy/data-access/insertcursor-class.htm)

#### Two different geometry implementations
* [Geometry - ArcGIS API for Python](https://developers.arcgis.com/python/api-reference/arcgis.geometry.html#geometry)
* [Geometry - arcpy](https://pro.arcgis.com/en/pro-app/latest/arcpy/classes/geometry.htm)

In [None]:
@track_emissions(project_name="Urban Digital Twin Bonn - Read SDF", output_file="log/emissions-read.user", offline=True, country_iso_code="USA")
def track_read_sdf(traffic_filepath: str):
    read_sqlite_to_featureclass(traffic_filepath, "SELECT * FROM agent_pos;")
    
@track_emissions(project_name="Urban Digital Twin Bonn - Read FC", output_file="log/emissions-read.user", offline=True, country_iso_code="USA")
def track_read_fc(traffic_filepath: str):
    read_sqlite_as_featureclass(traffic_filepath, "SELECT * FROM agent_pos;")

In [None]:
track_read_sdf(traffic_filepath)

In [None]:
read_emissions_df = read_emissions("log/emissions-read.user")
read_emissions_df

In [None]:
track_read_fc(traffic_filepath)

In [None]:
read_emissions_df = read_emissions("log/emissions-read.user")
read_emissions_df

### Deep dive

In [None]:
import pyinstrument

In [None]:
%load_ext pyinstrument

In [None]:
%%pyinstrument

traffic_featureclass = read_sqlite_to_featureclass(traffic_filepath, "SELECT * FROM agent_pos;")
traffic_featureclass

In [None]:
traffic_sdf = GeoAccessor.from_featureclass(traffic_featureclass)
traffic_sdf

In [None]:
%%pyinstrument

traffic_featureclass = read_sqlite_as_featureclass(traffic_filepath, "SELECT * FROM agent_pos;")
traffic_featureclass

In [None]:
traffic_sdf = GeoAccessor.from_featureclass(traffic_featureclass)
traffic_sdf

### The trade offs and alternatives

#### Use numpy to feature class
Your tools of choice are *df.apply*, *df.transform* and *dtypes*

[NumPy in ArcGIS](https://pro.arcgis.com/en/pro-app/3.2/arcpy/get-started/working-with-numpy-in-arcgis.htm)

We need to construct a complex numpy array.

In [None]:
traffic_arr = np.array([(1, (471316.3835861763, 5000448.782036674)), 
          (2, (470402.49348005146, 5000049.216449278))], np.dtype([('idfield', np.int32),('XY', '<f8', 2)]))
traffic_arr

We need to necessary modules for converting numpy arrays

In [None]:
from arcpy.da import NumPyArrayToFeatureClass
from arcpy.management import Delete

In [None]:
def df_to_numpy(traffic_df: pd.DataFrame):
    return np.array([(trip, (longitude, latitude),) for trip, longitude, latitude in traffic_df[['trip', 'longitude', 'latitude']].values], 
                       np.dtype([('idfield', np.int32),('XY', '<f8', 2)]))

def numpy_to_featureclass(array, out_featureclass: str):
    if arcpy.Exists(out_featureclass):
        Delete(out_featureclass)
    
    NumPyArrayToFeatureClass(traffic_arr, out_featureclass, ['XY'])
    return out_featureclass

In [None]:
traffic_featureclass_arr = numpy_to_featureclass(traffic_arr, 'memory/traffic_data_arr')
GeoAccessor.from_featureclass(traffic_featureclass_arr)

In [None]:
traffic_arr = df_to_numpy(traffic_sdf)
traffic_arr

In [None]:
traffic_featureclass_arr = numpy_to_featureclass(traffic_arr, 'memory/traffic_data_arr')
GeoAccessor.from_featureclass(traffic_featureclass_arr)

In [None]:
from traffic.read import read_sqlite_as_df

In [None]:
%%pyinstrument

traffic_df = read_sqlite_as_df(traffic_filepath, "SELECT * FROM agent_pos;")
traffic_arr = df_to_numpy(traffic_df)
traffic_featureclass_arr = numpy_to_featureclass(traffic_arr, 'memory/traffic_data_arr')
traffic_featureclass_arr

In [None]:
@track_emissions(project_name="Urban Digital Twin Bonn - Read NP", output_file="log/emissions-read.user", offline=True, country_iso_code="USA")
def track_read_np(traffic_filepath: str):
    traffic_df = read_sqlite_as_df(traffic_filepath, "SELECT * FROM agent_pos;")
    traffic_arr = df_to_numpy(traffic_df)
    numpy_to_featureclass(traffic_arr, 'memory/traffic_data_arr')

In [None]:
track_read_np(traffic_filepath)

In [None]:
read_emissions_df = read_emissions("log/emissions-read.user")
read_emissions_df.sort_values(by='emissions')

In [None]:
import geopandas as gpd
from geopandas import GeoDataFrame

In [None]:
def df_to_gpd(traffic_df: pd.DataFrame):
    return GeoDataFrame(traffic_df, geometry=gpd.points_from_xy(traffic_df['longitude'], traffic_df['latitude'], crs='EPSG:4326'))

In [None]:
@track_emissions(project_name="Urban Digital Twin Bonn - Read GPD", output_file="log/emissions-read.user", offline=True, country_iso_code="USA")
def track_read_gpd(traffic_filepath: str):
    traffic_df = read_sqlite_as_df(traffic_filepath, "SELECT * FROM agent_pos;")
    traffic_gpd = df_to_gpd(traffic_df)
    GeoAccessor.from_geodataframe(traffic_gpd, inplace=True, column_name='SHAPE')
    numpy_to_featureclass(traffic_arr, 'memory/traffic_data_arr')

In [None]:
track_read_gpd(traffic_filepath)

In [None]:
read_emissions_df = read_emissions("log/emissions-read.user")
read_emissions_df.sort_values(by='emissions')