## Import

In [2]:
import json
import requests
import numpy as np
import regex as re
import pandas as pd
import pickle as pkl
import shapely as shp
import geopandas as gpd
from copy import deepcopy
from sodapy import Socrata
import plotly.express as px
from pyproj import Transformer
import plotly.graph_objects as go
from geopy.distance import geodesic
from plotly.subplots import make_subplots

## Utility functions

In [2]:
def extract_shop_tags(row, mapping):
    if type(row[mapping["other_tags"]])==str and '"shop"=>' in row[mapping["other_tags"]]:
        a = row[mapping["other_tags"]].split('"shop"=>')[1]
        if "," in a:
            a = a.split(",")[0]
        a = a.replace('"', '').replace(" ", "")
        return a
    else:
        return None

In [17]:
def extract_building_tags(row, mapping):
    if type(row[mapping["other_tags"]])==str and '"building"=>' in row[mapping["other_tags"]]:
        a = row[mapping["other_tags"]].split('"building"=>')[1]
        if "," in a:
            a = a.split(",")[0]
        a = a.replace('"', '').replace(" ", "")
        return a
    else:
        return None

In [16]:
def extract_food_related_categories(row, mapping, food_related_categories):
    if type(row[mapping["shop"]])==str:
        if row[mapping["shop"]] in food_related_categories:
            return row[mapping["shop"]]
    return None

In [15]:
def extract_building_related_categories(row, mapping, building_related_categories):
    if type(row[mapping["building"]])==str:
        if row[mapping["building"]] in building_related_categories:
            return row[mapping["building"]]
    return None

In [14]:
def is_supermarket(row):
    if row.shop == "supermarket" or row.shop == "convenience":
        return True
    else:
        return False

In [13]:
def food_points_contained_by_NIL(row, food_data):
    l = []
    
    if hasattr(row, "geometry"):
        for j in range(len(food_data)):
            if row.geometry.contains(food_data.iloc[j].geometry):
                l.append(j)
    
    elif hasattr(row, "contains"):
        for j in range(len(food_data)):
            if row.contains(food_data.iloc[j].geometry):
                l.append(j)
    
    else:
        print("Error: row has no geometry or contains attribute")
        
    return l

In [18]:
def building_points_contained_by_NIL(row, building_data):
    l = []
    
    if hasattr(row, "geometry"):
        for j in range(len(building_data)):
            if row.geometry.contains(building_data.iloc[j].geometry):
                l.append(j)
    
    elif hasattr(row, "contains"):
        for j in range(len(building_data)):
            if row.contains(building_data.iloc[j].geometry):
                l.append(j)
    
    else:
        print("Error: row has no geometry or contains attribute")
    
    return l

In [19]:
def split_list(lst, size):
    return [lst[i:i+size] for i in range(0, len(lst), size)]

In [20]:
# create the adjacency matrix using the OSRM API
def create_adjacency_matrix(source_coordinates, destination_coordinates, mode="distance"):
        
    n = len(source_coordinates)
    m = len(destination_coordinates)
    distances = np.zeros((n,m))
            
    # split the coordinates in chunks of 100
    if n*m > 50*50:
        print("Warning: the number of coordinates is too high, the API won't be able to process the request")
        
             
    if type(source_coordinates) == gpd.geoseries.GeoSeries:
        source_coordinates_list = [[point.xy[0][0], point.xy[1][0]] for point in source_coordinates]
    
    if type(destination_coordinates) == gpd.geoseries.GeoSeries:
        destination_coordinates_list = [[point.xy[0][0], point.xy[1][0]] for point in destination_coordinates]
        
    fix_url = "http://router.project-osrm.org/table/v1/foot/"
                    
    # define the destinations and sources URL
    sources_url = "sources="+";".join(str(k) for k in range(len(source_coordinates)))  
    destinations_url = "destinations="+";".join(str(k+len(source_coordinates)) for k in range(len(destination_coordinates)))
    
    coordinates_list = source_coordinates_list + destination_coordinates_list
    
    # define the coordinates URL
    coordinates_url = "".join(["".join(str(str([coordinates_list[i][0],coordinates_list[i][1]]))[1:-1].split())+";" for i in range(len(coordinates_list))])[:-1]
                    
    # call the OSMR API
    r = requests.get(fix_url+coordinates_url
                        + f"?annotations={mode}&"
                        + sources_url+"&"
                        + destinations_url)
    
    try:
        routes = json.loads(r.content)
    except:
        print(fix_url 
                + coordinates_url
                + f"?annotations={mode}&"
                + sources_url+"&"
                + destinations_url)
        
        return None
                 
    if "message" in routes.keys():
        print(fix_url 
                + coordinates_url
                + f"?annotations={mode}&"
                + sources_url+"&"
                + destinations_url)
                    
    distances = np.array(routes[mode+'s'])
        
    return distances

In [29]:
def refine(b, dis, food_data, i=0):
    buffer = b.buffer(dis, cap_style="round")
    food_list = food_points_contained_by_NIL(buffer, food_data)
    if (len(food_list) >=10 and len(food_list) <= 20) or (i >= 50):
        return food_list
    
    else:
        if len(food_list) > 20:
            new_dis = dis/2
        else:
            new_dis = dis*1.5
        
        return refine(b, new_dis, food_data, i+1) 
        

In [21]:
def get_tag_from_index(index, data, tag):
    return data.iloc[index][tag]

## Food Related Data Preparation

In [11]:
# Read the GeoPackage file
data = gpd.read_file(r'C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\015146_Milano-2023-05-23T02Z.gpkg')

In [30]:
mapping = {col: i for i, col in enumerate(data.columns)}
data["shop"] = data.apply(lambda x: extract_shop_tags(x, mapping), axis=1)

In OpenStreetMap, the "shop" tag is used to describe various types of commercial establishments. When it comes to food-related categories for the "shop" tag, there are several options commonly used to represent different types of food-related businesses.

In [31]:
food_categories = [
    "alcohol",
    "bakery",
    "beverages",
    "brewing_supplies",
    "butcher",
    "cheese",
    "chocolate",
    "coffee",
    "confectionery",
    "convenience",
    "deli",
    "dairy",
    "farm",
    "frozen_food",
    "greengrocer",
    "health_food",
    "ice_cream",
    "pasta",
    "pastry",
    "seafood",
    "spices",
    "tea",
    "wine",
    "water",
    "supermarket",
    "food",
    "farm"
]

Remove non-food-related shops.

In [32]:
mapping = {col: i for i, col in enumerate(data.columns)}
food_related_rows = data.apply(lambda x: extract_food_related_categories(x, mapping, food_categories), axis=1).dropna().index
food_data = data.iloc[food_related_rows]

Remove nan columns.

In [33]:
for col in food_data.columns:
    if food_data[col].isna().all():
        food_data.drop(col, axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  food_data.drop(col, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  food_data.drop(col, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  food_data.drop(col, axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  food_data.drop(col, axis=1, inplace=True)


In [34]:
food_data.head(10)

Unnamed: 0,osm_id,name,ref,other_tags,geometry,shop
970,245055800,Pane e Dolci,,"""shop""=>""bakery""",POINT (9.15500 45.46046),bakery
1137,250969620,Lidl,,"""brand""=>""Lidl"",""brand:wikidata""=>""Q151954"",""b...",POINT (9.14200 45.44759),supermarket
1279,255137455,U2,,"""old_name""=>""Unes"",""operator""=>""Unes"",""shop""=>...",POINT (9.24180 45.48799),supermarket
1301,255508749,Iper Milano Portello,,"""addr:city""=>""Milano"",""addr:country""=>""IT"",""ad...",POINT (9.14540 45.49014),supermarket
1383,258035187,Pam,,"""addr:housenumber""=>""6"",""addr:street""=>""Viale ...",POINT (9.19804 45.45080),supermarket
1397,258076731,Carrefour Market,,"""addr:city""=>""Milano"",""addr:housenumber""=>""24/...",POINT (9.16623 45.48802),supermarket
1398,258076763,Pam local,,"""addr:city""=>""Milano"",""addr:housenumber""=>""8"",...",POINT (9.16506 45.48880),supermarket
1443,259592419,Lidl,,"""brand""=>""Lidl"",""brand:wikidata""=>""Q151954"",""b...",POINT (9.14885 45.46232),supermarket
1635,266384203,Punto SMA,,"""addr:housenumber""=>""38"",""addr:postcode""=>""201...",POINT (9.20268 45.45128),supermarket
1701,268403462,Mercato Comunale,,"""shop""=>""convenience"",""wheelchair""=>""yes""",POINT (9.16267 45.43128),convenience


#### Food density

##### by Area

In [35]:
NIL = gpd.read_file(r'C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\NIL_geometry.geojson')

In [36]:
NIL

Unnamed: 0,FID_1,FID_1_1,ID_NIL,NIL,AreaHA,AreaMQ,geometry
0,0,0,74,SACCO,70.84658,7.084658e+05,"POLYGON ((9.12195 45.51602, 9.12163 45.51589, ..."
1,1,1,82,COMASINA,92.67346,9.267346e+05,"POLYGON ((9.16887 45.52397, 9.16803 45.52234, ..."
2,2,2,75,STEPHENSON,56.00979,5.600979e+05,"POLYGON ((9.12933 45.50998, 9.12973 45.50939, ..."
3,3,3,66,QT 8,102.44374,1.024437e+06,"POLYGON ((9.14368 45.48474, 9.14338 45.48420, ..."
4,4,4,29,ORTOMERCATO,140.25196,1.402520e+06,"POLYGON ((9.23739 45.45588, 9.23731 45.45427, ..."
...,...,...,...,...,...,...,...
83,83,83,6,TICINESE,125.50647,1.255065e+06,"POLYGON ((9.18675 45.45235, 9.18659 45.45183, ..."
84,84,84,47,CANTALUPA,92.67168,9.267168e+05,"POLYGON ((9.15445 45.41758, 9.15433 45.41743, ..."
85,85,85,86,PARCO DEI NAVIGLI,361.78363,3.617836e+06,"POLYGON ((9.15266 45.41520, 9.15200 45.41635, ..."
86,86,86,68,PAGANO,128.97343,1.289734e+06,"POLYGON ((9.16506 45.46684, 9.16486 45.46619, ..."


In [37]:
NIL["food_count"] = [0]*len(NIL)

for i in range(len(NIL)):
    for j in range(len(food_data)):
        if NIL.iloc[i].geometry.contains(food_data.iloc[j].geometry):
            NIL.at[i, "food_count"] = NIL.iloc[i]["food_count"] + 1

In [38]:
NIL["food_density_AreaHA"] = NIL["food_count"]/NIL["AreaHA"]
NIL["food_density_AreaHA"] = (NIL["food_density_AreaHA"]-NIL["food_density_AreaHA"].min())/(NIL["food_density_AreaHA"].max()-NIL["food_density_AreaHA"].min())

##### by population

In [39]:
NIL_population = pd.read_csv(r'C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\ds27_pop_sto_quartiere_1999_2022.csv',
                             sep=";")

We keep only the most recent data (i.e. the 2022)

In [40]:
NIL_population = NIL_population[NIL_population["Anno"] == 2022]

It seem that the ID of the NILs is the same in both the datasets. (Despite some differences in the names of the NILs)

In [41]:
for id in NIL_population["IdNil"].unique():
    print("name1: ", NIL_population[NIL_population["IdNil"] == id].Nil.iloc[0],
          "\nname2: ", NIL[NIL["ID_NIL"] == id].NIL.iloc[0],"\n")

name1:  Duomo 
name2:  DUOMO 

name1:  Brera 
name2:  BRERA 

name1:  Giardini P.ta Venezia 
name2:  GIARDINI PORTA VENEZIA 

name1:  Guastalla 
name2:  GUASTALLA 

name1:  Porta Vigentina - Porta Lodovica 
name2:  VIGENTINA 

name1:  Porta Ticinese - Conca del Naviglio 
name2:  TICINESE 

name1:  Magenta - S.Vittore 
name2:  MAGENTA - S. VITTORE 

name1:  Porta Garibaldi - Porta Nuova 
name2:  GARIBALDI REPUBBLICA 

name1:  Stazione Centrale - Ponte Seveso 
name2:  CENTRALE 

name1:  Isola 
name2:  ISOLA 

name1:  Maciachini - Maggiolina 
name2:  MACIACHINI - MAGGIOLINA 

name1:  Greco - Segnano 
name2:  GRECO 

name1:  Niguarda - Ca’ Granda - Prato Centenaro - Q.re Fulvio Testi 
name2:  NIGUARDA - CA' GRANDA 

name1:  Bicocca 
name2:  BICOCCA 

name1:  Gorla - Precotto 
name2:  VIALE MONZA 

name1:  Adriano 
name2:  ADRIANO 

name1:  Cimiano - Rottole - Q.re Feltre 
name2:  PARCO LAMBRO - CIMIANO 

name1:  Padova - Turro - Crescenzago 
name2:  PADOVA 

name1:  Loreto - Casoretto - No

In [42]:
NIL = NIL.join(NIL_population[["IdNil", "Residenti"]].groupby("IdNil").sum(),
               on="ID_NIL")

In [43]:
NIL["food_density_Residenti"] = NIL["food_count"]/NIL["Residenti"]
NIL["food_density_Residenti"] = (NIL["food_density_Residenti"]-NIL["food_density_Residenti"].min())/(NIL["food_density_Residenti"].max()-NIL["food_density_Residenti"].min())

In [44]:
NIL["food_density_Residenti_AreaHA"] = NIL["food_density_Residenti"]*NIL["food_density_AreaHA"]
NIL["food_density_Residenti_AreaHA"] = (NIL["food_density_Residenti_AreaHA"]-NIL["food_density_Residenti_AreaHA"].min())/(NIL["food_density_Residenti_AreaHA"].max()-NIL["food_density_Residenti_AreaHA"].min())

##### Refining the density splitting supermarkets and non-supermarkets

In [45]:
food_data["is_supermarket"] = food_data.apply(lambda x: is_supermarket(x), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [46]:
supermarket_data = food_data[food_data["is_supermarket"]]
non_supermarket_data = food_data[~food_data["is_supermarket"]]

In [47]:
NIL["supermarket_count"] = [0]*len(NIL)

for i in range(len(NIL)):
    for j in range(len(food_data)):
        if NIL.iloc[i].geometry.contains(food_data.iloc[j].geometry) and food_data.iloc[j]["is_supermarket"]:
            NIL.at[i, "supermarket_count"] = NIL.iloc[i]["supermarket_count"] + 1

In [48]:
NIL["supermarket_density_AreaHA"] = NIL["supermarket_count"]/NIL["AreaHA"]
NIL["supermarket_density_AreaHA"] = (NIL["supermarket_density_AreaHA"]-NIL["supermarket_density_AreaHA"].min())/(NIL["supermarket_density_AreaHA"].max()-NIL["supermarket_density_AreaHA"].min())

In [49]:
NIL["supermarket_density_Residenti"] = NIL["supermarket_count"]/NIL["Residenti"]
NIL["supermarket_density_Residenti"] = (NIL["supermarket_density_Residenti"]-NIL["supermarket_density_Residenti"].min())/(NIL["supermarket_density_Residenti"].max()-NIL["supermarket_density_Residenti"].min())

In [50]:
NIL["non_supermarket_count"] = [0]*len(NIL)

for i in range(len(NIL)):
    for j in range(len(food_data)):
        if NIL.iloc[i].geometry.contains(food_data.iloc[j].geometry) and not food_data.iloc[j]["is_supermarket"]:
            NIL.at[i, "non_supermarket_count"] = NIL.iloc[i]["non_supermarket_count"] + 1

In [51]:
NIL["non_supermarket_density_AreaHA"] = NIL["non_supermarket_count"]/NIL["AreaHA"]
NIL["non_supermarket_density_AreaHA"] = (NIL["non_supermarket_density_AreaHA"]-NIL["non_supermarket_density_AreaHA"].min())/(NIL["non_supermarket_density_AreaHA"].max()-NIL["non_supermarket_density_AreaHA"].min())

In [52]:
NIL["non_supermarket_density_Residenti"] = NIL["non_supermarket_count"]/NIL["Residenti"]
NIL["non_supermarket_density_Residenti"] = (NIL["non_supermarket_density_Residenti"]-NIL["non_supermarket_density_Residenti"].min())/(NIL["non_supermarket_density_Residenti"].max()-NIL["non_supermarket_density_Residenti"].min())

In [53]:
NIL["supermarket_density_Residenti_AreaHA"] = NIL["supermarket_density_Residenti"]*NIL["supermarket_density_AreaHA"]
NIL["non_supermarket_density_Residenti_AreaHA"] = NIL["non_supermarket_density_Residenti"]*NIL["non_supermarket_density_AreaHA"]

In [54]:
NIL["supermarket_density_Residenti_AreaHA"] = (NIL["supermarket_density_Residenti_AreaHA"]-NIL["supermarket_density_Residenti_AreaHA"].min())/(NIL["supermarket_density_Residenti_AreaHA"].max()-NIL["supermarket_density_Residenti_AreaHA"].min())
NIL["non_supermarket_density_Residenti_AreaHA"] = (NIL["non_supermarket_density_Residenti_AreaHA"]-NIL["non_supermarket_density_Residenti_AreaHA"].min())/(NIL["non_supermarket_density_Residenti_AreaHA"].max()-NIL["non_supermarket_density_Residenti_AreaHA"].min())

## Mobility Related Data Preparation

In OpenStreetMap, houses are typically identified using the building tag. The building tag is used to describe various types of buildings, including houses, apartments, commercial buildings, and more.

In [55]:
building_tags = [
    "apartments",
    "barracks",
    "bungalow",
    "cabin",
    "detached",
    "dormitory",
    "farm",
    "ger",
    "hotel",
    "house",
    "houseboat",
    "residential",
    "semidetached_house",
    "static_caravan",
    "stilt_house",
    "terrace",
    "tree_house"
]

Remove non-building-related shops.

In [56]:
mapping = {col: i for i, col in enumerate(data.columns)}
data["building"] = data.apply(lambda x: extract_building_tags(x, mapping), axis=1)

In [57]:
mapping = {col: i for i, col in enumerate(data.columns)}
building_related_rows = data.apply(lambda x: extract_building_related_categories(x, mapping, building_tags), axis=1).dropna().index
building_data = data.iloc[building_related_rows]

As we can see there are only two row tagged with one of the building tags. We need another way to identify the houses.

In [58]:
building_data

Unnamed: 0,osm_id,name,barrier,highway,ref,address,is_in,place,man_made,other_tags,geometry,shop,building
74354,5265908345,"Casa dello Studente ""Leonardo da Vinci""",,,,,,,,"""addr:city""=>""Milano"",""addr:housenumber""=>""62""...",POINT (9.22414 45.47654),,dormitory
120974,9513191821,Studio Tiger,,,,,,,,"""building""=>""house"",""leisure""=>""sports_centre""...",POINT (9.20791 45.47953),,house


In [59]:
house_related_point_index = []

for i, e in enumerate(data.other_tags):
    if type(e) == str:
        l = re.findall(r'"addr:housenumber"=>"(\d*?)"', e)
        if len(l) > 0:
            house_related_point_index.append(i)

In [61]:
building_data = data.iloc[house_related_point_index]

### Computing the distance between the food points and the building points (by foot)

#### Within each NIL

Ghatering food_points and building_points in each NIL.

In [62]:
NIL["food_index"] = NIL.apply(lambda x: food_points_contained_by_NIL(x, food_data), axis=1)
NIL["building_index"] = NIL.apply(lambda x: building_points_contained_by_NIL(x, building_data), axis=1)

In [63]:
source_coords = []
dest_coords = []

for i in range(len(NIL)):
    source_coords.append(building_data.iloc[NIL.iloc[i].building_index].geometry)
    dest_coords.append(food_data.iloc[NIL.iloc[i].food_index].geometry)

In [69]:
distances_matrix = []

print("Computing distances matrix...")
for i in range(len(NIL)):
    print(f"NIL {i} of {len(NIL)-1}")
    n = len(source_coords[i])
    m = len(dest_coords[i])
    
    if len(source_coords[i]) > 0 and len(dest_coords[i]) > 0:
        if len(source_coords[i]) > 70:
            matrix = np.zeros((n, m))
            source_lists = split_list(source_coords[i], 70)
        
            for j, l in enumerate(source_lists):
                matrix[70*j:70*j+len(l), :] = create_adjacency_matrix(l, dest_coords[i])
            
            distances_matrix.append(np.round(matrix/60,2))
        else:
            distances_matrix.append(np.round(create_adjacency_matrix(source_coords[i], dest_coords[i])/60,2))
    else:
        distances_matrix.append(0)

Computing distances matrix...
NIL 0 of 87
NIL 1 of 87
NIL 2 of 87
NIL 3 of 87
NIL 4 of 87
NIL 5 of 87
NIL 6 of 87
NIL 7 of 87
NIL 8 of 87
NIL 9 of 87
NIL 10 of 87
NIL 11 of 87
NIL 12 of 87
NIL 13 of 87
NIL 14 of 87
NIL 15 of 87
NIL 16 of 87
NIL 17 of 87
NIL 18 of 87
NIL 19 of 87
NIL 20 of 87
NIL 21 of 87
NIL 22 of 87
NIL 23 of 87
NIL 24 of 87
NIL 25 of 87
NIL 26 of 87
NIL 27 of 87
NIL 28 of 87
NIL 29 of 87
NIL 30 of 87
NIL 31 of 87
NIL 32 of 87
NIL 33 of 87
NIL 34 of 87
NIL 35 of 87
NIL 36 of 87
NIL 37 of 87
NIL 38 of 87
NIL 39 of 87
NIL 40 of 87
NIL 41 of 87
NIL 42 of 87
NIL 43 of 87
NIL 44 of 87
NIL 45 of 87
NIL 46 of 87
NIL 47 of 87
NIL 48 of 87
NIL 49 of 87
NIL 50 of 87
NIL 51 of 87
NIL 52 of 87
NIL 53 of 87
NIL 54 of 87
NIL 55 of 87
NIL 56 of 87
NIL 57 of 87
NIL 58 of 87
NIL 59 of 87
NIL 60 of 87
NIL 61 of 87
NIL 62 of 87
NIL 63 of 87
NIL 64 of 87
NIL 65 of 87
NIL 66 of 87
NIL 67 of 87
NIL 68 of 87
NIL 69 of 87
NIL 70 of 87
NIL 71 of 87
NIL 72 of 87
NIL 73 of 87
NIL 74 of 87
NIL 7

#### One point in each NIL vs all the food points in Milan

In [4]:
path = r"C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\02_Processed\NIL_data.pkl"
NIL = pkl.load(open(path, "rb"))

In [6]:
path = r"C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\02_Processed\building_data.pkl"
building_data = pkl.load(open(path, "rb"))

In [9]:
path = r"C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\02_Processed\food_data.pkl"
food_data = pkl.load(open(path, "rb"))

In [7]:
np.random.seed(21932839)

sampled_building = [x[np.random.randint(0, len(x), 1)[0]] if len(x)>0 else [] for x in NIL.building_index]
sampled_building_location = [building_data.iloc[x].geometry if type(x)==int else None for x in sampled_building]

In [10]:
ch = shp.MultiPoint([point for point in food_data.geometry]).convex_hull
ch_centroid = ch.centroid

In [30]:
sampled_building_food_points = []

for i, b in enumerate(sampled_building_location):
    if b is not None:
        dis = geodesic((ch_centroid.y, ch_centroid.x), (b.y, b.x)).km
        buffer_ds = (dis)/100
        food_points = refine(b, buffer_ds, food_data)
        sampled_building_food_points.append(food_points)
    else:
        sampled_building_food_points.append([])
    
    print(f"NIL {i} of {len(NIL)-1}", f"\nSampled points: {len(sampled_building_food_points[-1])}")

NIL 0 of 87 
Sampled points: 20
NIL 1 of 87 
Sampled points: 11
NIL 2 of 87 
Sampled points: 20
NIL 3 of 87 
Sampled points: 18
NIL 4 of 87 
Sampled points: 16
NIL 5 of 87 
Sampled points: 15
NIL 6 of 87 
Sampled points: 11
NIL 7 of 87 
Sampled points: 13
NIL 8 of 87 
Sampled points: 12
NIL 9 of 87 
Sampled points: 12
NIL 10 of 87 
Sampled points: 11
NIL 11 of 87 
Sampled points: 14
NIL 12 of 87 
Sampled points: 19
NIL 13 of 87 
Sampled points: 17
NIL 14 of 87 
Sampled points: 14
NIL 15 of 87 
Sampled points: 19
NIL 16 of 87 
Sampled points: 17
NIL 17 of 87 
Sampled points: 16
NIL 18 of 87 
Sampled points: 10
NIL 19 of 87 
Sampled points: 18
NIL 20 of 87 
Sampled points: 10
NIL 21 of 87 
Sampled points: 15
NIL 22 of 87 
Sampled points: 12
NIL 23 of 87 
Sampled points: 12
NIL 24 of 87 
Sampled points: 18
NIL 25 of 87 
Sampled points: 11
NIL 26 of 87 
Sampled points: 18
NIL 27 of 87 
Sampled points: 20
NIL 28 of 87 
Sampled points: 20
NIL 29 of 87 
Sampled points: 17
NIL 30 of 87 
Sample

In [31]:
source_coords = []
dest_coords = []

for i in range(len(NIL)):
    source_coords.append(building_data.iloc[sampled_building[i]].geometry)
    dest_coords.append(food_data.iloc[sampled_building_food_points[i]].geometry)

In [32]:
distances_matrix2 = []

print("Computing distances matrix...")
for i in range(len(NIL)):
    print(f"NIL {i} of {len(NIL)-1}")
    if type(source_coords[i]) == shp.geometry.point.Point:
        coords = gpd.GeoSeries(pd.concat([pd.Series([source_coords[i]]), dest_coords[i]], ignore_index=True))
        
        if len(coords) > 70:
            matrix = np.zeros((n, m))
            source_lists = split_list(coords, 70)
        
            for j, l in enumerate(source_lists):
                matrix[70*j:70*j+len(l), :] = create_adjacency_matrix(l, coords)
            
            distances_matrix2.append(np.round(matrix/60,2))
        else:
            distances_matrix2.append(np.round(create_adjacency_matrix(coords, coords)/60,2))
    else:
        distances_matrix2.append(0)

Computing distances matrix...
NIL 0 of 87
NIL 1 of 87
NIL 2 of 87
NIL 3 of 87
NIL 4 of 87
NIL 5 of 87
NIL 6 of 87
NIL 7 of 87
NIL 8 of 87
NIL 9 of 87
NIL 10 of 87
NIL 11 of 87
NIL 12 of 87
NIL 13 of 87
NIL 14 of 87
NIL 15 of 87
NIL 16 of 87
NIL 17 of 87
NIL 18 of 87
NIL 19 of 87
NIL 20 of 87
NIL 21 of 87
NIL 22 of 87
NIL 23 of 87
NIL 24 of 87
NIL 25 of 87
NIL 26 of 87
NIL 27 of 87
NIL 28 of 87
NIL 29 of 87
NIL 30 of 87
NIL 31 of 87
NIL 32 of 87
NIL 33 of 87
NIL 34 of 87
NIL 35 of 87
NIL 36 of 87
NIL 37 of 87
NIL 38 of 87
NIL 39 of 87
NIL 40 of 87
NIL 41 of 87
NIL 42 of 87
NIL 43 of 87
NIL 44 of 87
NIL 45 of 87
NIL 46 of 87
NIL 47 of 87
NIL 48 of 87
NIL 49 of 87
NIL 50 of 87
NIL 51 of 87
NIL 52 of 87
NIL 53 of 87
NIL 54 of 87
NIL 55 of 87
NIL 56 of 87
NIL 57 of 87
NIL 58 of 87
NIL 59 of 87
NIL 60 of 87
NIL 61 of 87
NIL 62 of 87
NIL 63 of 87
NIL 64 of 87
NIL 65 of 87
NIL 66 of 87
NIL 67 of 87
NIL 68 of 87
NIL 69 of 87
NIL 70 of 87
NIL 71 of 87
NIL 72 of 87
NIL 73 of 87
NIL 74 of 87
NIL 7

In [33]:
distances_matrix2_index = pd.DataFrame({"building_index": sampled_building, "food_index": sampled_building_food_points})

In [34]:
distances_matrix2_index["building_location"] = distances_matrix2_index.building_index.apply(lambda x: [get_tag_from_index(y, building_data, "geometry") for y in [x]])
distances_matrix2_index["food_shop_location"] = distances_matrix2_index.food_index.apply(lambda x: [get_tag_from_index(y, food_data, "geometry") for y in x])
distances_matrix2_index["food_shop_tag"] = distances_matrix2_index.food_index.apply(lambda x: [get_tag_from_index(y, food_data, "shop") for y in x])

## Save data

In [72]:
sav_path = r"C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\02_Processed\NIL_data.pkl"
pkl.dump(NIL, open(sav_path, "wb"))

In [None]:
sav_path = r"C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\02_Processed\food_data.pkl"
pkl.dump(food_data, open(sav_path, "wb"))

In [70]:
sav_path = r"C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\02_Processed\building_data.pkl"
pkl.dump(building_data, open(sav_path, "wb"))

In [71]:
sav_path = r"C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\02_Processed\distances_matrix.pkl"
pkl.dump(distances_matrix, open(sav_path, "wb"))

In [35]:
sav_path = r"C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\02_Processed\distances_matrix3.pkl"
pkl.dump(distances_matrix2, open(sav_path, "wb"))

In [322]:
sav_path = r"C:\Users\Marco\Documents\GitHub\commercial-activities-Milan\Data\02_Processed\distances_matrix3_index.pkl"
pkl.dump(distances_matrix2_index, open(sav_path, "wb"))