# Create Pytorch Geometric Data Objects

In [3]:
import pandas as pd
import osmnx as ox
import networkx as nx
from tqdm import tqdm
import geopandas as gpd
from utils import createPytorchData

## Cargar datos de santiago y mapa de OSM

In [2]:
nodos_santiago = pd.read_csv('Data/dataset_santiago_ismt.csv')
nodos_santiago.head(2)

Unnamed: 0,latlong,beautiful,boring,depressing,lively,safe,wealthy,lat,lon,node_id,division,prom_ismt
0,"-33.323944,-70.51263428391168",-0.306948,1.565049,0.572029,-1.137733,-0.120456,-0.561887,-33.323944,-70.512634,1396118148,Lo Barnechea,0.915742
1,"-33.323944,-70.5127291",-0.421388,0.309495,0.368965,-0.098733,-0.103042,-0.162294,-33.323944,-70.512729,1396118148,Lo Barnechea,0.915742


### Cambiar nombre comuna a int

In [8]:
nodos_santiago['division_num'] = pd.Categorical(nodos_santiago['division']).codes
print(nodos_santiago[['division', 'division_num']].drop_duplicates()[:5])

          division  division_num
0     Lo Barnechea            17
3341      Vitacura            41
3856        Colina             4
3893     Quilicura            30
4817         Lampa            15


In [9]:
num2div = {}
div2num = {}
for _, row in nodos_santiago[['division', 'division_num']].drop_duplicates().iterrows():
    num2div[row.division_num] = row.division
    div2num[row.division] = row.division_num

In [10]:
import pickle
with open('../Data/Comunas/division2numbers.pkl', 'wb') as fp:
    pickle.dump(div2num, fp)
with open('../Data/Comunas/numbers2division.pkl', 'wb') as fp:
    pickle.dump(num2div, fp)

In [11]:
G = ox.load_graphml("../Maps/santiago_drive.graphml")
print(f'Number of nodes: {len(G.nodes())}')
print(f'Number of edges: {len(G.edges())}')
print(f'Number of strongly conncected components: {nx.number_strongly_connected_components(G)}')

Number of nodes: 355936
Number of edges: 673565
Number of strongly conncected components: 2450


In [12]:
components_length = [len(list(component)) for component in list(nx.strongly_connected_components(G))]
components_length.sort(reverse=True)
print(f"{round(components_length[0]/len(G.nodes()),5) * 100}% of nodes in one component")

99.259% of nodes in one component


## Add values to the complementary nodes of the OSM Graph

In [13]:
def get_nodes_attrs(G):
    attributes = {}
    for node, attrs in G.nodes.data():
        #print(f"Atributos de {node}: {', '.join(attrs.keys())}")
        for att in attrs.keys():
            if att not in attributes:
                attributes[att] = 0
            attributes[att] += 1
    return attributes

def get_edges_attrs(G):
    edges_atts = {}
    for u, v, attr in G.edges.data():
        for att in attr.keys():
            if att not in edges_atts:
                edges_atts[att] = 0
            edges_atts[att] += 1
    return edges_atts

In [14]:
# Se agregan los atributos como "boring", "lively" etc. a los nodos existentes que se encuentran en el grafo G
for _, row in nodos_santiago.iterrows():
    G.add_node(row.node_id, beautiful=row.beautiful, boring=row.boring,
               depressing=row.depressing, lively=row.lively,
               safe=row.safe, wealthy=row.wealthy, division=row.division_num, ismt=row.prom_ismt)

In [15]:
get_nodes_attrs(G)

{'y': 355936,
 'x': 355936,
 'street_count': 355936,
 'highway': 35935,
 'beautiful': 84252,
 'boring': 84252,
 'depressing': 84252,
 'lively': 84252,
 'safe': 84252,
 'wealthy': 84252,
 'division': 84252,
 'ismt': 84252,
 'ref': 296}

In [16]:
# Eliminar atributos extras de OSM
for node in G.nodes:
    if 'ref' in G.nodes[node]:
        G.nodes[node].pop('ref', None)
    if 'highway' in G.nodes[node]:
        G.nodes[node].pop('highway', None)
    if 'street_count' in G.nodes[node]:
        G.nodes[node].pop('street_count', None)

In [17]:
for u, v, attr in G.edges.data():
    for name in ['osmid', 'oneway', 'lanes', 'name', 'access', 'maxspeed', 'ref', 'bridge', 'junction', 'width', 'tunnel', 'osmid' 'oneway', 'highway', 'reversed', 'speed_kph']:
        if name in attr:
            del attr[name]

In [29]:
df_comunas = gpd.read_file('Data/Comunas/COMUNAS_2020.shp')
df_ismt = gpd.read_file('Data/ISMT/ISMT.shp')
santiago_division = df_comunas[df_comunas.CUT_REG == '13']

def getDivision(point):
    for div in santiago_division.geometry:
        if point.within(div):
            return div
        
def getIsmtProm(point):
    for com in df_ismt.geometry:
        if point.within(com):
            return com

def setDivisionNum(division: str):
    return div2num[division]

def geoDataProcess(df_extra):
    # Obtener latitud y longitug
    lats = []
    lons = []
    for _, row in df_extra.iterrows():
        node = row.node_id
        lats.append(G.nodes[node]['y'])
        lons.append(G.nodes[node]['x'])
    df_extra['lat'] = lats
    df_extra['lon'] = lons

    coords_point = gpd.GeoDataFrame(df_extra, geometry=gpd.points_from_xy(df_extra.lon,df_extra.lat))
    coords_point['division'] = coords_point.geometry.apply(getDivision)
    tqdm.pandas()
    coords_point['geo_ismt'] = coords_point.geometry.progress_apply(getIsmtProm)
    
    df_merge = coords_point.merge(santiago_division[['geometry', 'COMUNA']], left_on='division', right_on='geometry')
    df_merge = df_merge.drop(['geometry_x', 'geometry_y', 'division'], axis=1)
    df_merge = df_merge.rename({'COMUNA':'division'}, axis=1)
    df_merge['division_num'] = df_merge.division.apply(setDivisionNum)
    
    df_merge = df_merge.merge(df_ismt[['geometry', 'prom_ismt']], left_on='geo_ismt', right_on='geometry', how='left')
    df_extra = df_merge.drop(['geometry', 'geo_ismt'], axis=1)

    
    

    return df_extra

In [21]:
ls_nodos_santiago = list(nodos_santiago.node_id.unique())
extra_nodes = list(set(G.nodes()) - set(ls_nodos_santiago))
len(extra_nodes) + 84252

355936

## Zero Map

In [33]:
# Setear valores de nodos extras en 0
ls_nodos_santiago = list(nodos_santiago.node_id.unique())
extra_nodes = list(set(G.nodes()) - set(ls_nodos_santiago))
print(f"Number of nodes: {len(extra_nodes)}")
beautiful_values = [0] * len(extra_nodes)
boring_values = [0] * len(extra_nodes)
depressing_values = [0] * len(extra_nodes)
lively_values = [0] * len(extra_nodes)
safe_values = [0] * len(extra_nodes)
wealthy_values = [0] * len(extra_nodes)

columns_name = ['node_id', 'beautiful', 'boring', 'depressing', 'lively', 'safe', 'wealthy']
df_extra = pd.DataFrame(list(zip(extra_nodes, beautiful_values, boring_values, depressing_values,
                                 lively_values, safe_values, wealthy_values)), columns=columns_name)


df_extra = geoDataProcess(df_extra)

Number of nodes: 271684


100%|██████████| 271684/271684 [1:47:53<00:00, 41.97it/s]  


In [35]:
df_extra = df_extra.fillna(0)

In [36]:
for _, row in tqdm(df_extra.iterrows()):
    G.add_node(row.node_id, beautiful=row.beautiful, boring=row.boring,
               depressing=row.depressing, lively=row.lively,
               safe=row.safe, wealthy=row.wealthy, division=row.division_num, ismt=row.prom_ismt)

271684it [00:21, 12606.38it/s]


In [37]:
nodes_ls = list(G.nodes())
c=0
dic_nodes = {}
for node in nodes_ls:
    dic_nodes[node] = c
    c+=1

G = nx.relabel_nodes(G, dic_nodes)

In [38]:
get_nodes_attrs(G)

{'y': 355936,
 'x': 355936,
 'beautiful': 355936,
 'boring': 355936,
 'depressing': 355936,
 'lively': 355936,
 'safe': 355936,
 'wealthy': 355936,
 'division': 355936,
 'ismt': 355936}

In [39]:
ox.save_graphml(G, "Maps/santiago_drive_zero.graphml")

In [4]:
G = ox.load_graphml("Maps/santiago_drive_zero.graphml")

In [5]:
cont = 0
nodos_por_comuna = {}
for node, attrs in G.nodes(data=True):
    if attrs['boring'] != 0:
        cont += 1
    if attrs['division'] not in nodos_por_comuna:
        nodos_por_comuna[attrs['division']] = 0
    nodos_por_comuna[attrs['division']] += 1    
print(cont)
print(nodos_por_comuna)

355936
{'39': 8397, '32': 5333, '27': 8808, '7': 5942, '16': 20954, '42': 8183, '23': 4502, '37': 3115, '9': 2974, '21': 29320, '28': 16907, '3': 4194, '33': 8809, '30': 15328, '5': 5315, '41': 12235, '25': 13110, '11': 23402, '12': 6454, '20': 6258, '2': 6787, '14': 7727, '38': 3127, '10': 3839, '18': 3550, '8': 8638, '35': 4810, '22': 5439, '31': 4702, '29': 33428, '17': 18161, '13': 6546, '34': 18281, '6': 5216, '24': 521, '19': 4426, '26': 3451, '36': 542, '15': 2758, '0': 920, '1': 2615, '40': 736, '4': 176}


In [6]:
createPytorchData(G, file_name='santiago_zero_ismt')

Number of nodes: 355936
Number of edges: 673565
Number of edge attributes: 673565
Size of x tensor: torch.Size([355936, 10])
Size of edge_index tensor: torch.Size([2, 673565])
Size of edge_attributes tensor: torch.Size([673565, 2])
Graph saved as PyTorch in Data/santiago_zero_ismt.pt


## Mean Global

In [35]:
# Setear valores de nodos extras en media global
ls_nodos_santiago = list(nodos_santiago.node_id.unique())
extra_nodes = list(set(G.nodes()) - set(ls_nodos_santiago))

beautiful_values = [nodos_santiago['beautiful'].median()] * len(extra_nodes)
boring_values = [nodos_santiago['boring'].median()] * len(extra_nodes)
depressing_values = [nodos_santiago['depressing'].median()] * len(extra_nodes)
lively_values = [nodos_santiago['lively'].median()] * len(extra_nodes)
safe_values = [nodos_santiago['safe'].median()] * len(extra_nodes)
wealthy_values = [nodos_santiago['wealthy'].median()] * len(extra_nodes)

columns_name = ['node_id', 'beautiful', 'boring', 'depressing', 'lively', 'safe', 'wealthy']
df_extra = pd.DataFrame(list(zip(extra_nodes, beautiful_values, boring_values, depressing_values,
                                 lively_values, safe_values, wealthy_values)), columns=columns_name)


df_extra = geoDataProcess(df_extra)

In [37]:
for _, row in tqdm(df_extra.iterrows()):
    G.add_node(row.node_id, beautiful=row.beautiful, boring=row.boring,
               depressing=row.depressing, lively=row.lively,
               safe=row.safe, wealthy=row.wealthy, comuna=row.comuna_num)

268038it [00:18, 14178.12it/s]


In [38]:
ox.save_graphml(G, "Maps/santiago_drive_global_mean.graphml")

In [39]:
createPytorchData(G, file_name='santiago_global_mean')

Number of nodes: 355936
Number of edges: 673565
Number of edge attributes: 673565
Size of x tensor: torch.Size([355936, 9])
Size of edge_index tensor: torch.Size([2, 673565])
Size of edge_attributes tensor: torch.Size([673565, 2])
Graph saved as PyTorch in Data/santiago_global_mean.pt
