# Expanding Madina
## Goals
* Implementing a betweenness flow sumulation function that just can be called from a zonal object with minimal parameters
* implementing a dunction that takews count data and outputs a list of statistical models and thwir interporetation
* set up a complete docstring template that's compliant and complete..
* use code to run sommerville, run stats and interpret
* publish the docstring on cityformlab.mit.edu or on github.com
* use code to runn Beirut, interpret results
* use mofel to run Melbourn, NYC,...

In [1]:
import sys
import pandas as pd
import geopandas as gpd
from datetime import datetime
from pathlib import Path
from shapely.ops import transform

sys.path.append("..")
from madina.zonal.zonal import Layer
from madina.zonal.zonal import Zonal
from madina.una.betweenness import parallel_betweenness
from madina.una.elastic import get_elastic_weight

In [2]:
pd.__version__

'2.0.2'

In [3]:
import numpy as np
np.__version__

'1.24.3'

In [4]:
gpd.__version__

'0.13.2'

In [5]:
import shapely
shapely.__version__

'2.0.1'

In [6]:
class Logger():
    def __init__(self, output_folder):
        self.output_folder = output_folder
        self.start_time = datetime.now()
        self.log_df = pd.DataFrame(
            {
                "time": pd.Series(dtype='datetime64[ns]'),
                "distance": pd.Series(dtype="string"),
                "tune_penalty": pd.Series(dtype="string"),
                "elastic_weight": pd.Series(dtype="string"),
                "origin": pd.Series(dtype="string"),
                "destination": pd.Series(dtype="string"),
                "event": pd.Series(dtype="string")
            }
        )

    def simulation_start(self, zone, network_weight_settings, turn_penalty_settings, elastic_weight_settings):
        ##-------------------------------------------------------------------------------------------------------------< Betweenness Recode...
        self.betweenness_record = zone.layers['streets'].gdf.copy(deep=True)
        self.separate_simulation_records = {}
        for network_weight in network_weight_settings:
            self.separate_simulation_records[network_weight] = {}
            for turn_penalty in turn_penalty_settings:
                self.separate_simulation_records[network_weight][turn_penalty] = {}
                for elastic_weight in elastic_weight_settings:
                    self.separate_simulation_records[network_weight][turn_penalty][elastic_weight] = self.betweenness_record.copy(deep=True)

    def log(self, input_dict):
        input_dict["time"] = datetime.now()
        if self.log_df.shape[0] == 0:
            print(f"total time\tseconds elapsed\tdiatance method\telastic_weight\t{'origin':^15s}\t{'destination':^15s}\tevent")
            input_dict["seconds_elapsed"] = 0
            input_dict["cumulative_seconds"] = 0
        else:
            time_elapsed = (input_dict["time"] - self.log_df.iloc[-1]["time"]).seconds
            input_dict["seconds_elapsed"] = time_elapsed
            input_dict["cumulative_seconds"] = self.log_df["seconds_elapsed"].sum() + time_elapsed

        for column_name in self.log_df.columns:
            if column_name not in input_dict:
                input_dict[column_name] = "---"

        self.log_df = pd.concat([self.log_df, pd.DataFrame([input_dict])], ignore_index=True)
        print(
            f"{input_dict['cumulative_seconds']:6.4f}\t\t"
            f"{input_dict['seconds_elapsed']}\t\t\t\t"
            f"{input_dict['distance']}\t\t"
            f"{input_dict['tune_penalty']}\t\t"
            f"{input_dict['elastic_weight']}\t\t"
            f"{input_dict['origin']:^15s}\t"
            f"{input_dict['destination']:^15s}\t"
            f"{input_dict['event']}"
        )

    def pairing_end(self, shaqra: Zonal, pairing, network_weight, turn_penalty, elastic_weight):
        # creating a folder for output
        pairing_folder = self.output_folder + f"{network_weight}\\{'with_turns' if turn_penalty else 'no_turns'}\\{'elastic_weight' if elastic_weight else 'unadjusted_weight'}\\O({pairing['Origin_Name']})_D({pairing['Destination_Name']})\\"
        Path(pairing_folder).mkdir(parents=True, exist_ok=True)

        street_gdf = shaqra.layers["streets"].gdf
        node_gdf = shaqra.network.nodes
        origin_gdf = node_gdf[node_gdf["type"] == "origin"]
        destination_gdf = node_gdf[node_gdf["type"] == "destination"]
        edge_gdf = shaqra.network.edges
        edge_gdf["width"] = edge_gdf["betweenness"] / edge_gdf["betweenness"].mean() + 0.25


        self.betweenness_record = self.betweenness_record.join(
            edge_gdf[['parent_street_id', 'betweenness']].set_index('parent_street_id')).rename(
            columns={
                "betweenness": f"{network_weight}_{'with_turns' if turn_penalty else 'no_turns'}_{'elastic_weight' if elastic_weight else 'unadjusted_weight'}_{pairing['Between_Name']}"})

        self.separate_simulation_records[network_weight][turn_penalty][elastic_weight] = \
        self.separate_simulation_records[network_weight][turn_penalty][elastic_weight].join(
            edge_gdf[['parent_street_id', 'betweenness']].set_index('parent_street_id')).rename(
            columns={
                "betweenness": f"{network_weight}_{'with_turns' if turn_penalty else 'no_turns'}_{'elastic_weight' if elastic_weight else 'unadjusted_weight'}_{pairing['Between_Name']}"})

        save_results = \
            edge_gdf.set_index('parent_street_id').join(street_gdf, lsuffix='_from_edge')[
                # , rsuffix='_from_streets'
                ["betweenness", "__GUID", "geometry"]]
        save_results = save_results.rename(
            columns={
                "betweenness": f"{network_weight}_{'with_turns' if turn_penalty else 'no_turns'}_{'elastic_weight' if elastic_weight else 'unadjusted_weight'}_{pairing['Between_Name']}"})

        save_results.to_csv(pairing_folder + "flows.csv")
        # save_results.to_file(pairing_folder + "flows.geojson", driver="GeoJSON")
        self.betweenness_record.to_csv(pairing_folder + "betweenness_record_so_far.csv")
        self.separate_simulation_records[network_weight][turn_penalty][elastic_weight].to_csv(
            pairing_folder + "simulation_record_so_far.csv")

        self.log_df.to_csv(pairing_folder + "time_log.csv")

        self.log({
            "origin": pairing["Origin_Name"],
            "destination": pairing["Destination_Name"],
            "event": "Output saved",
            "distance": network_weight,
            "tune_penalty": turn_penalty,
            "elastic_weight": elastic_weight})

    def simulation_end(self, network_weight_settings, turn_penalty_settings, elastic_weight_settings):
        self.betweenness_record.to_csv(self.output_folder + "betweenness_record.csv")
        #self.betweenness_record.to_file(self.output_folder + "street_network_betweenness_record.geojson", driver="GeoJSON")
        for network_weight in network_weight_settings:
            for turn_penalty in turn_penalty_settings:
                for elastic_weight in elastic_weight_settings:
                    pairing_folder = self.output_folder + f"{network_weight}\\{'with_turns' if turn_penalty else 'no_turns'}\\{'elastic_weight' if elastic_weight else 'unadjusted_weight'}\\"
                    self.separate_simulation_records[network_weight][turn_penalty][elastic_weight].to_csv(pairing_folder+"betweenness_record.csv")
        self.log({"event": "All DONE."})
        self.log_df.to_csv(self.output_folder + "time_log.csv")


In [7]:
def betweenness_flow_simulation(
        city_name="Somerville",
        data_folder=None,
        output_folder=None,
        pairings_file="Pairings.csv",
        network_weight_settings = ["Perceived", "Geometric"],
        turn_penalty_settings = [False, True],
        elastic_weight_settings = [False, True],
        num_cores=8,
        turn_threshold_degree=45,
        turn_penalty_amount=30,
        impose_crs=None
    ):
    '''
    WHat used to be a major project, is now implemented into this standard workflow.....
    '''
    # Validate user input parameters, raise exceptions or make modifications..
    if data_folder is None:
        data_folder = "Cities\\"+city_name+"\\Data\\"
    if output_folder is None:
        start_time = datetime.now()
        output_folder = f"Cities\\{city_name}\\Simulations\\{start_time.year}_{start_time.month:02d}_{start_time.day:02d}_{start_time.hour:02d}_{start_time.minute:02d}\\"

    logger=Logger(output_folder)
    logger.log({"event": "beginning"})

    pairings = gpd.read_file(data_folder + pairings_file)

    zonal = Zonal()
    if impose_crs is not None:
        zonal = Zonal(projected_crs=impose_crs)

    zonal.load_layer(
        layer_name='streets',
        file_path=data_folder +  pairings.at[0, "Network_File"]  # "Network.geojson"
    )
    if impose_crs is not None:
        zonal.layers["streets"] = Layer(
            label="streets",
            gdf=gpd.read_file(data_folder +  pairings.at[0, "Network_File"]).set_crs(impose_crs, allow_override=True),
            show=True,
            original_crs=impose_crs,
            file_path=(data_folder + pairings.at[0, "Network_File"])
        )

    ##-------------------------------------------------------------------------------------------------------------< Data Cleaning/...
    geometry_gdf = zonal.layers["streets"].gdf
    polygon_idxs = geometry_gdf[geometry_gdf["geometry"].geom_type == "Polygon"].index
    geometry_gdf.loc[polygon_idxs,"geometry"] = geometry_gdf.loc[polygon_idxs, "geometry"].exterior
    zonal.layers["streets"].gdf = geometry_gdf

    if zonal.layers["streets"].gdf.has_z.any():
        def _to_2d(x, y, z):
            return tuple(filter(None, [x, y]))
        zonal.layers["streets"].gdf["geometry"] = zonal.layers["streets"].gdf[
            "geometry"].apply(lambda s: transform(_to_2d, s))

    if (zonal.layers["streets"].gdf.geometry.geom_type == "MultiLineString").all():
        zonal.layers["streets"].gdf["geometry"] = zonal.layers["streets"].gdf[
            "geometry"].apply(lambda s: s.geoms[0])

    print(f'streets\t{zonal.layers["streets"].gdf.crs}')

    logger.simulation_start(zonal, network_weight_settings, turn_penalty_settings, elastic_weight_settings)

    # Setting up a street network
    zonal.create_street_network(
        source_layer="streets",
        node_snapping_tolerance=1,
        weight_attribute=None if "Perceived" not in network_weight_settings else pairings.at[0, "Network_Cost"],
        discard_redundant_edges=True, # <---------------------------------------TODO: Expose as a parameter
        turn_threshold_degree=turn_threshold_degree,
        turn_penalty_amount=turn_penalty_amount
        )
    # This is to re-set the origins and destinations before any new iteration. TODO: implement as a Zonal function.
    clean_node_gdf = zonal.network.nodes.copy(deep=True)

    # preparing percieved and geometric weights...
    perceived_network_weight = zonal.network.edges["weight"]
    perceived_network_weight = perceived_network_weight.apply(lambda x: max(1, x))      # To avoid any negative numbers...
    geometric_network_weight = zonal.network.edges["geometry"].length

    logger.log({"event": "Network topology created."})

    for idx, pairing in pairings.iterrows():
        # Loading layers,  if they're not already loaded.
        if pairing["Origin_Name"] not in zonal.layers:
            zonal.load_layer(
                layer_name=pairing["Origin_Name"],
                file_path=data_folder + pairing["Origin_File"]
            )
            print(f"{pairing['Origin_Name']}\t{zonal.layers[pairing['Origin_Name']].gdf.crs}")
            if (impose_crs is not None) and (zonal.layers[pairing["Origin_Name"]].gdf.crs != impose_crs):
                print("Imposing CRS", impose_crs)
                # zonal.layers[pairing["Origin_Name"]].gdf = gpd.read_file(data_folder + pairing["Origin_File"]).set_crs(impose_crs, allow_override=True)
                # zonal.layers[pairing["Origin_Name"]].crs = impose_crs
                zonal.layers[pairing["Origin_Name"]] = Layer(
                    label=pairing["Origin_Name"],
                    gdf=gpd.read_file(data_folder + pairing["Origin_File"]).set_crs(impose_crs, allow_override=True),
                    show=True,
                    original_crs=impose_crs,
                    file_path=(data_folder + pairing["Origin_File"])
                )

            print(f"{pairing['Origin_Name']}\t{zonal.layers[pairing['Origin_Name']].gdf.crs}\t{zonal.layers[pairing['Origin_Name']].crs}")


            ##-------------------------------------------------------------------------------------------------------------< Data Cleaning/...
            if zonal.layers[pairing["Origin_Name"]].gdf.has_z.any():
                # zonal.layers[pairing["Origin_Name"]].gdf["geometry"] = zonal.layers[pairing["Origin_Name"]].gdf["geometry"].apply(
                #     lambda s: transform(_to_2d, s))
                zonal.layers[pairing["Origin_Name"]] = Layer(
                    label=pairing["Origin_Name"],
                    gdf=zonal.layers[pairing["Origin_File"]].gdf["geometry"].apply(lambda s: transform(_to_2d, s)),
                    show=True,
                    original_crs=impose_crs,
                    file_path=(data_folder + pairing["Origin_File"])
                )

        if pairing["Destination_Name"] not in zonal.layers:
            zonal.load_layer(
                layer_name=pairing["Destination_Name"],
                file_path=data_folder + pairing["Destination_File"]
            )
            if (impose_crs is not None) and (zonal.layers[pairing["Destination_Name"]].gdf.crs != impose_crs):
                # zonal.layers[pairing["Destination_Name"]].gdf = gpd.read_file(data_folder + pairing["Destination_File"]).set_crs(impose_crs, allow_override=True)
                # zonal.layers[pairing["Destination_Name"]].crs = impose_crs
                zonal.layers[pairing["Destination_Name"]] = Layer(
                    label=pairing["Destination_Name"],
                    gdf=gpd.read_file(data_folder + pairing["Destination_File"]).set_crs(impose_crs, allow_override=True),
                    show=True,
                    original_crs=impose_crs,
                    file_path=(data_folder + pairing["Destination_File"])
                )
            ##-------------------------------------------------------------------------------------------------------------< Data Cleaning/...
            if zonal.layers[pairing["Destination_Name"]].gdf.has_z.any():
                # zonal.layers[pairing["Destination_Name"]].gdf["geometry"] = zonal.layers[pairing["Destination_Name"]].gdf[
                #     "geometry"].apply(lambda s: transform(_to_2d, s))
                zonal.layers[pairing["Destination_Name"]] = Layer(
                    label=pairing["Destination_Name"],
                    gdf=zonal.layers[pairing["Destination_File"]].gdf["geometry"].apply(lambda s: transform(_to_2d, s)),
                    show=True,
                    original_crs=impose_crs,
                    file_path=(data_folder + pairing["Destination_File"])
                )
        
        print(f"{pairing['Origin_Name']}\t{zonal.layers[pairing['Origin_Name']].gdf.crs}")
        print(f"{pairing['Destination_Name']}\t{zonal.layers[pairing['Destination_Name']].gdf.crs}")
        print(f"Zonal Edge Count: {len(zonal.network.edges)}")

        # making sure to clear any existing origins and destinations before adding new ones.
        zonal.network.nodes = clean_node_gdf.copy(deep=True)

        # iterating over network weight options..
        for network_weight in network_weight_settings:

            # setting the proper network_weight
            if network_weight == "Perceived":
                zonal.network.nodes["weight"] = perceived_network_weight
            elif network_weight == "Geometric":
                zonal.network.edges["weight"] = geometric_network_weight

            # using an effecient insert algorithm TODO: should be built inti the main Madina code... currently imported from betweenness function..
            zonal.insert_node(
                label="origin",
                layer_name=pairing["Origin_Name"],
                weight_attribute=pairing["Origin_Weight"] if pairing["Origin_Weight"] != "Count" else None,
            )

            zonal.insert_node(
                label="destination",
                layer_name=pairing["Destination_Name"],
                weight_attribute=pairing["Destination_Weight"] if pairing["Destination_Weight"] != "Count" else None,
            )

            inelastic_weight = zonal.network.nodes['weight']


            logger.log({
                "origin": pairing["Origin_Name"],
                "destination": pairing["Destination_Name"],
                "event": "Origins and Destinations prepared."
                })

            zonal.create_graph(light_graph=True, od_graph=True)

            logger.log({
                "origin": pairing["Origin_Name"],
                "destination": pairing["Destination_Name"],
                "event": "Light and dense graphs prepared."
                })

            for turn_penalty in turn_penalty_settings:
                # TODO: Investigate the value of pasing internal calculations beween simulations..
                #retained_d_idxs = {}
                #retained_paths = {}
                #retained_distances = {}
                for elastic_weight in elastic_weight_settings:
                    # The order of these is important, as the weight is overriden by
                    # elastic weight as there is no clean way to update weight for now.
                    if elastic_weight:
                        get_elastic_weight(
                            zonal.network,
                            search_radius=800,
                            detour_ratio=0.002,
                            beta=0.002,
                            decay=True,
                            turn_penalty=turn_penalty,
                            retained_d_idxs=None  #<------------------- This is very sensitive to the orfer of iteration. TODO: change to a more solid implementation
                            #retained_d_idxs=None
                            )

                        logger.log({
                            "origin": pairing["Origin_Name"],
                            "destination": pairing["Destination_Name"],
                            "event": "Elastic Weights generated.",
                            "distance": network_weight, "tune_penalty": turn_penalty,
                            "elastic_weight": elastic_weight})
                    else:
                        zonal.network.nodes['weight'] = inelastic_weight


                    node_gdf = zonal.network.nodes
                    origin_gdf = node_gdf[node_gdf["type"] == "origin"]

                    num_cores = min(origin_gdf.shape[0], num_cores) # if not elastic_weight else 1

                    betweenness_output = parallel_betweenness(
                        zonal.network,
                        search_radius=float(pairing["Radius"]),
                        detour_ratio=float(pairing["Detour"]),
                        decay=False if elastic_weight else True,
                        decay_method="exponent",  # "power", "exponent"
                        beta=float(pairing["Beta"]),
                        path_detour_penalty="equal",  # "power", "exponent", "equal"
                        origin_weights=True,
                        closest_destination=False,
                        destination_weights=True,  # if pairing["Destination_Name"] != "Mosques" else False,
                        # perceived_distance=False,
                        num_cores=num_cores,
                        light_graph=True,
                        turn_penalty=turn_penalty,
                        #retained_d_idxs=retained_d_idxs if elastic_weight else None,
                        #retained_paths=retained_paths if elastic_weight else None,
                        #retained_distances=retained_distances if elastic_weight else None,
                        rertain_expensive_data=False if elastic_weight else True,
                        retained_d_idxs=None,
                        retained_paths=None,
                        retained_distances=None,
                        #rertain_expensive_data=False
                    )

                    if not elastic_weight: #< -------------------------------------------------------- sensitive to order of looping #TODO: implement for more general case.
                        retained_d_idxs = betweenness_output["retained_d_idxs"]
                        #retained_paths = betweenness_output["retained_paths"]
                        #retained_distances = betweenness_output["retained_distances"]

                    logger.log({
                        "origin": pairing["Origin_Name"],
                        "destination": pairing["Destination_Name"],
                        "event": "Betweenness estimated.",
                        "distance": network_weight,
                        "tune_penalty": turn_penalty,
                        "elastic_weight": elastic_weight})
                    logger.pairing_end(zonal, pairing, network_weight, turn_penalty, elastic_weight)
                    
    logger.simulation_end(network_weight_settings, turn_penalty_settings, elastic_weight_settings)
    Path(output_folder).mkdir(parents=True, exist_ok=True)
    zonal.network.edges.to_csv(output_folder + "edge_gdf.csv")
    zonal.network.nodes.to_csv(output_folder + "node_gdf.csv")

    # zonal.network.nodes.drop(columns=['nearest_street_node_distance']).to_csv(output_folder + "node_gdf_part_1.csv")
    # zonal.network.nodes['nearest_street_node_distance'].to_csv(output_folder + "node_gdf_part_2_dict.csv")
    # nearest_street_node = pd.DataFrame({
    #     'left_id': [dict(dict(d).get('left')).get('node_id') for d in zonal.network.nodes['nearest_street_node_distance'] if d is not nan],
    #     'left_weight': [dict(dict(d).get('left')).get('weight') for d in zonal.network.nodes['nearest_street_node_distance']],
    #     'right_id': [dict(dict(d).get('right')).get('node_id') for d in zonal.network.nodes['nearest_street_node_distance']],
    #     'right_weight': [dict(dict(d).get('right')).get('weight') for d in zonal.network.nodes['nearest_street_node_distance']]
    # })
    # nearest_street_node.to_csv(output_folder + "node_gdf_part_2.csv")

In [8]:
betweenness_flow_simulation(
    city_name="Somerville",
    data_folder=f'Cities\\Somerville\\Data\\',
    pairings_file="Pairings.csv",
    network_weight_settings=["Geometric"],          # ["Perceived", "Geometric"],
    turn_penalty_settings=[True],                   # [False, True]
    elastic_weight_settings=[False],          # [False, True]
    num_cores=20,
    turn_threshold_degree=45,
    turn_penalty_amount=62.3,
    impose_crs='epsg:26986'
)

total time	seconds elapsed	diatance method	elastic_weight	    origin     	  destination  	event
0.0000		0				---		---		---		      ---      	      ---      	beginning
streets	epsg:26986


  matching = point_geometries.sindex.query_bulk(


179
[  19   20   37   42   43   70   83   84  109  368  373  387  392  479
  484  485  774  779  837  849  852  875  892 1049 1202 1261 1262 1315
 1354 1355 1417 1429 1430 1461 1462 1486 1497 1498 1512 1584 1643 1655
 1689 1707 1728 1769 1785 1803 1854 1887 1888 1905 1932 1980 1994 2001
 2031 2046 2047 2118 2119 2273 2317 2410 2411 2422 2424 2526 2527 2561
 2562 2694 2695 2696 2717 2826 2828 2829 2889 2916 2930 3088 3089 3119
 3126 3127 3164 3165 3206 3207 3228 3249 3250 3344 3345 3874 3875 4037
 4038 4531 4532 4584 4586 4753 4799 4804 4805 5152 5153 5199 5218 5246
 5247 5248 5249 5267 5276 5379 5540 5548 5549 5665 5666 5712 5833 5841
 5842 5883 5899 5912 5991 6001 6017 6019 6233 6255 6262 6296 6298 6299
 6311 7013 7060 7061 7070 7073 7096 7097 7104 7154 7155 7168 7199 7200
 7230 7259 7260 7307 7375 7467 7498 7515 7716 8224 8297 8449 8834 8880
 8881 8884 8885 8911 8916 9103 9246 9287 9385 9557 9589]
80 179
{2561, 3088, 19, 7199, 8224, 5666, 37, 42, 3126, 7230, 2118, 9287, 83, 6233, 725