In [1]:
import osmnx as ox
import networkx as nx
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as cx
from contextily import Place
import folium
from shapely.geometry import Polygon
import ast
import pandas as pd
from shapely.geometry import Point, LineString
from networkx.readwrite import json_graph
import json
import numpy as np
import random
from folium.plugins import MarkerCluster
from numpy.random import choice
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from branca.colormap import LinearColormap,StepColormap
import plotly.express as px
from branca.colormap import linear

ox.settings.log_console = True
plt.rcParams["figure.dpi"] = 50


In [5]:
def scaled_featuresMinMax(df_coordinates,column_to_scale):

    # Extract the column to scale
    column_values = df_coordinates[column_to_scale].values.reshape(-1, 1)
    
    # Create a MinMaxScaler instance
    scaler = MinMaxScaler()
    
    # Fit the scaler to the column and transform it
    scaled_values = scaler.fit_transform(column_values.reshape(-1, 1)).flatten()

    return scaled_values

def scaled_features(df_coordinates,column_to_scale):
    column_values = df_coordinates[column_to_scale].values

    total =np.sum(column_values)
    print(total)
    #scaled_values =column_values/total
    normalized_values = column_values / np.linalg.norm(column_values, axis=0)
    return normalized_values



def get_color(value, color_ranges):
    for i, (start, end) in enumerate(color_ranges):
        if start <= value <= end:
            return i
    return len(color_ranges)  # Default color if not in any range

def plot_graph_metrics(df_scaled, scoretype, save=False, zoom=10, weightedSize=False,scaling=''):
    # Set the figure size using plt
    plt.figure(figsize=(10, 8)) 

    center_coords = (df_scaled.iloc[0]['Latitude'], df_scaled.iloc[0]['Longitude'])
    folium_map = folium.Map(location=center_coords, zoom_start=14)

    # Calculate quartiles and median
    Q1 = np.percentile(df_scaled[scoretype], 25)
    Q3 = np.percentile(df_scaled[scoretype], 75)
    #median = df_scaled[scoretype].median()
    IQR= Q3-Q1
    filtered_df = df_scaled[(df_scaled[scoretype] >= Q1 - 1.5 * IQR) & (df_scaled[scoretype] <= Q3 + 1.5 * IQR)]
    median = filtered_df[scoretype].median()  
    #


    print(f"min:{df_scaled[scoretype].min()}, median:{median},maxIQR:{Q3+1.5*IQR},max:{df_scaled[scoretype].max()}")
    # Add points to the map
    for _, row in df_scaled.iterrows():
        value = row[scoretype]
        

        # Map color index to specific colors
        if (value >= df_scaled[scoretype].min()) and (value <= median):
            color = 'green'
            radius = value * (zoom / 2)  # Adjust the scaling factor as needed
        elif (value > median) and (value <= Q3+1.5*IQR):
            color = 'yellow'
            radius = value * (zoom / 2)
        elif (value > Q3+1.5*IQR) and (value <= df_scaled[scoretype].max()):
            color = 'red'
            radius = value * (zoom / 2)
        else:
            color = 'blue'  # Default color if not in any range
            radius = zoom / 2  # Adjust the default size if needed

        if weightedSize:
            folium.CircleMarker(
                location=(row['Latitude'], row['Longitude']),
                radius=radius,  # Adjust the scaling factor as needed
                color=color,
                fill=True,
                fill_color=color,
                fill_opacity=0.7).add_to(folium_map)
        else:
            folium.CircleMarker(
                location=(row['Latitude'], row['Longitude']),
                radius=radius,
                color=color,
                fill=True,
                fill_color=color,
                fill_opacity=0.7).add_to(folium_map)

    if save:
        if weightedSize:
            folium_map.save(scaling+'_'+scoretype + '_WeightedNodeSize.html')
        else:
            folium_map.save(scaling+'_'+scoretype + '_UnweightedNodeSize.html')

    # Show the plot
    plt.show()

    return folium_map

def plot_graph_metricsGradient(df_scaled, scoretype, save=False, zoom=10, weightedSize=False,scaling=''):
    center_coords = (df_scaled.iloc[0]['Latitude'], df_scaled.iloc[0]['Longitude'])
    folium_map = folium.Map(location=center_coords, zoom_start=14)

    # Create a custom colormap with green, yellow, and red
    green_yellow_red_colors = ['darkgreen', 'gold', 'darkred']
    #LINEAR
    # Get the colors from px.colors.cyclical.IceFire
    ice_fire_colors = px.colors.cyclical.IceFire
    
    # Create a colormap based on probability with IceFire colors
    colormap = LinearColormap(colors=ice_fire_colors,
                              vmin=df_scaled[scoretype].min(),
                              vmax=df_scaled[scoretype].max())

    # Add points to the map
    for _, row in df_scaled.iterrows():
        color = 'blue'  # Default color
        if scoretype in df_scaled.columns:
            # Adjust the color based on probability
            color = colormap(row[scoretype])
        if weightedSize:
            folium.CircleMarker(
                location=(row['Latitude'], row['Longitude']),
                radius=row[scoretype] * zoom,  # Adjust the scaling factor as needed
                color=color,
                fill=True,
                fill_color=color,
                fill_opacity=0.7).add_to(folium_map)
        else:
            folium.CircleMarker(
                location=(row['Latitude'], row['Longitude']),
                radius=zoom,
                color=color,
                fill=True,
                fill_color=color,
                fill_opacity=0.7).add_to(folium_map)

    # Add colormap strip to the map
    colormap.caption = scoretype
    colormap.add_to(folium_map)

    if save:
        if weightedSize:
            folium_map.save(scaling+'_'+scoretype + '_WeightedNodeSize.html')
        else:
            folium_map.save(scaling+'_'+scoretype + '_UnweightedNodeSize.html')

    return folium_map


def get_influence_factor(node, my_dict):
    connected_roads = [road_type for _, road_type in my_dict[node]]
    connected_roads = [item for sublist in connected_roads for item in (sublist if isinstance(sublist, list) else [sublist])]
    #print(connected_roads)
    connected_roads_tuple = tuple(connected_roads)
    #print(connected_roads_tuple)

    # Important roads like highways, national, and European roads
    if all(road_type in {'trunk', 'trunk_link', 'primary', 'primary_link'} for road_type in connected_roads_tuple):

        return 1.0
    # County and local roads, I guess
    elif all(road_type in {'secondary','secondary_link', 'tertiary','tertiary_link'} for road_type in connected_roads_tuple):
        return 0.5
    else:
        return 0.1

In [3]:
city_name = "Brasov, Romania"
G = ox.graph_from_place(city_name, network_type="drive", simplify= True)
#G_proj = ox.project_graph(G, to_crs='epsg:4326')
#G_proj = ox.add_edge_bearings(G_proj)

In [12]:
nodes_df, edges_df = ox.graph_to_gdfs(G)
extracted_edges = list(edges_df.index)
road_type= list(edges_df['highway'].values)



my_dict = {key: [] for key in nodes_df.index}
#print(f"E= {edge}, node= {node}")
for node in my_dict.keys():
    #print(node)
    for edges in extracted_edges:
        #print(edges)
        if node in edges:
            #print(f"node={node}")
            #print(edge[:2])
            aux= [i for i in edges[:2] if i!= node]
            if len(aux) > 0:#if edge is not a self loop
                aux=aux[0]
            else:
                aux=node #for self loops edges

            tuple_node_roadType = [aux,edges_df['highway'].loc[edges]]
            my_dict[node].append(tuple_node_roadType)

df_coordinates =pd.DataFrame()
########################################################################################################################
df_coordinates = ox.graph_to_gdfs(G, edges=False)["geometry"].apply(lambda x: (x.x, x.y)).reset_index()
df_coordinates.columns = ["Node_ID", "Coordinates"]
df_coordinates[["Longitude","Latitude"]] = pd.DataFrame(df_coordinates["Coordinates"].tolist(), index=df_coordinates.index)
df_coordinates[["Longitude","Latitude"]] = pd.DataFrame(df_coordinates["Coordinates"].tolist(), index=df_coordinates.index)
# Assign probabilities to nodes
df_coordinates['page_rank'] = (nx.pagerank(G)).values()
influence_factor_list = [get_influence_factor(node_to_check, my_dict) for node_to_check in my_dict.keys()]
df_coordinates['influence_factor_page_rank'] = influence_factor_list * df_coordinates['page_rank'].values

In [16]:
df_coordinates=pd.read_csv('df_coordinates.csv')
#df_coordinates=pd.read_csv('AugmentedPoints.csv')
df_coordinates['influence_factor_page_rank_minMax'] = scaled_featuresMinMax(df_coordinates,'influence_factor_page_rank')
df_coordinates['influence_factor_page_rank_norm'] = scaled_features(df_coordinates,'influence_factor_page_rank')

0.18714178878771265


In [17]:
Q1 = df_coordinates['influence_factor_page_rank'].quantile(0.25)
Q3 = df_coordinates['influence_factor_page_rank'].quantile(0.75)
IQR = Q3 - Q1

filtered_df = df_coordinates[(df_coordinates['influence_factor_page_rank'] >= Q1 - 1.5 * IQR) & (df_coordinates['influence_factor_page_rank'] < Q3 + 1.5 * IQR)]
median_IQR = filtered_df['influence_factor_page_rank'].median()

In [21]:
plot_graph_metrics(df_coordinates,'influence_factor_page_rank_minMax',zoom=10, save= True,weightedSize=True,scaling='GoogleStyling')

min:0.0, median:0.06040767362840407,maxIQR:0.13966256248846615,max:0.9999999999999999


<Figure size 720x576 with 0 Axes>

## Data Augmentation

In [18]:
Q1 = df_coordinates['influence_factor_page_rank_minMax'].quantile(0.25)
Q3 = df_coordinates['influence_factor_page_rank_minMax'].quantile(0.75)
IQR = Q3 - Q1

filtered_df = df_coordinates[(df_coordinates['influence_factor_page_rank_minMax'] >= Q1 - 1.5 * IQR) & (df_coordinates['influence_factor_page_rank_minMax'] < Q3 + 1.5 * IQR)]
median_IQR = filtered_df['influence_factor_page_rank_minMax'].median()

In [19]:
print(f"Median of the IQR={median_IQR}")
print(f"QI={Q1},IQR={IQR} Q3={Q3}")

Median of the IQR=0.06040767362840407
QI=0.04445015006827212,IQR=0.040652302504562324 Q3=0.08510245257283444


In [20]:
with open('C:\\Users\\Camelia\\Desktop\\app\\Disertatie\\augmentThesePoints.txt', 'r') as file:
    line = file.readline()
    while line:
        aux = int(line.strip()) # strip() removes the newline character
        #print("Aux:", aux)  # Debug statement
        
        # Check if aux exists in the 'Node_ID' column
        mask = df_coordinates['Node_ID'].isin([aux])
        
        if mask.any():  # Check if any matching rows are found
            df_coordinates.loc[mask, 'influence_factor_page_rank_minMax'] = median_IQR
        else:
            print(f"No matching rows found for Node_ID: {aux}")  # Debug statement
        
        line = file.readline()

No matching rows found for Node_ID: 2504784425
No matching rows found for Node_ID: 252551993


In [22]:
df_coordinates.to_csv('AugmentedPoints.csv')