<div style="border: none; margin: 5px 0; border-top: 1px dashed #FFFFFF; border-bottom: 1px dashed #FFFFFF; height: 5px;"></div>

<h2 style="color: #FFA07A;">2. Exploring Data</h2>

In [4]:
import ipywidgets as widgets
import time

# --- Create an HTML widget to display the typing effect ---
output = widgets.HTML(value="<div></div>")
display(output)

# --- Properly formatted text for typing effect ---
text = """
<h3>How did we prepare the data?</h3> 
<p><b>The data was pre-processed and structured into .csv files through the following procedures:</b></p> 

<p>🔸 <b>.csv services_by_category:</b> contains buildings in the municipality of Porto extracted from OpenStreetMap (OSM) and essential services identified on Google Maps. Non-residential buildings were excluded. Each building was associated with services within a 1.5 km radius (15-minute walk). The <b>K-D Tree</b> algorithm was used for proximity queries and the <b>Dijkstra</b> algorithm to find the shortest path between two points in a graph.</p> 

<p>🔸 <b>.csv average_distance_to_services:</b> includes the above procedures and adds the calculation of the average walking distance to essential services, considering paths up to 1.5 km. Each building receives an average distance to the available services.</p> 

<p>🔸 <b>.csv population_65_plus:</b> distributes the population aged 65 and over across buildings within each unit of the Geographic Reference Base for Information (BGRI), proportionally to the building area and the population of each unit.</p> 

<p><b>The study considered 31,873 buildings with 58,774 residents aged 65 or older. The following services were identified:</b></p> 

<ul> 
  <li>🔸 Banks (100)</li> 
  <li>🔸 Supermarkets (94)</li> 
  <li>🔸 Pharmacies (77)</li> 
  <li>🔸 Parks and gardens (33)</li> 
  <li>🔸 Post offices (CTT) (24)</li> 
  <li>🔸 Health centers (18)</li> 
  <li>🔸 Hospitals (2)</li> 
</ul> 

<div> 
🔸 <b><u>The unit of analysis throughout the study is the building.</u></b> For more details on the data preprocessing procedures, see the full documentation on <b><a href="https://github.com/RobertoOlivetree/Average_Distance_to_Services_by_Category.git" target="_blank">GitHub</a></b>.
</div> 
"""

# --- Create typing effect inside the HTML while keeping original formatting ---
text_html = """
<div style="background-color: #FFFFFF; color: #333333; padding: 15px; 
            border-left: 5px solid #FFA500; font-family: Arial, sans-serif; 
            text-align: justify; font-size: 16px; line-height: 1.6;">
"""
for word in text.split():
    text_html += word + " "
    output.value = text_html + "</div>"  
    time.sleep(0.10)  

output.value = text_html + "</div>"

HTML(value='<div></div>')

In [5]:
from IPython.display import Javascript, display
# hide-me
display(Javascript('window.cellVisibilityManager.hideCells();'))

ipython = get_ipython()
ipython.run_line_magic("run", "case_study_prep.ipynb")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [8]:
from IPython.display import Javascript, display
# hide-me

# --- Load CSV Files and Convert to Tables (Spatial if Coordinates Exist) ---
def load_data(file_paths):
    data = {}
    for name, path in file_paths.items():
        if not os.path.exists(path):
            raise FileNotFoundError(f"The file '{path}' was not found for the dataset '{name}'")
        print(f"[INFO] Loading: {name}")
        try:
            table = pd.read_csv(path)
        except Exception as e:
            raise ValueError(f"Error loading file '{path}': {e}")
        if 'geometry' in table.columns:
            table['geometry'] = table['geometry'].apply(lambda x: wkt.loads(x) if pd.notnull(x) else None)
            table = gpd.GeoDataFrame(table, geometry='geometry', crs='EPSG:4326')
        elif 'stop_lat' in table.columns and 'stop_lon' in table.columns:
            table['geometry'] = table.apply(
                lambda row: Point(row['stop_lon'], row['stop_lat']) 
                if pd.notnull(row['stop_lon']) and pd.notnull(row['stop_lat']) else None,
                axis=1
            )
            table = gpd.GeoDataFrame(table, geometry='geometry', crs='EPSG:4326')
        table.dropna(subset=['geometry'], inplace=True)
        data[name] = table
    return data

# --- Prepare Final Tables for Analysis, Merge Datasets and Clean Missing Values ---
def prepare_data(data):
    for key, table in data.items():
        if 'osm_id' not in table.columns:
            raise KeyError(f"Column 'osm_id' does not exist in the file '{key}'")
        data[key]['osm_id'] = data[key]['osm_id'].astype(str)
    merge_steps = [
        {'data': 'services_by_category', 'columns': None},
        {'data': 'population_65_plus', 'columns': None}
    ]
    merged_table = data['average_distance_to_services'].copy()
    for step in merge_steps:
        file_name = step['data']
        columns_to_merge = step['columns']
        if file_name in data:
            try:
                if columns_to_merge is None:
                    columns_to_merge = data[file_name].columns
                merged_table = pd.merge(merged_table, data[file_name][columns_to_merge],
                                     on='osm_id', how='left')
                print(f"[INFO] Merge with '{file_name}' complete. Total rows: {merged_table.shape[0]}")
            except KeyError as e:
                raise KeyError(f"Error merging with '{file_name}': {e}.")
        else:
            print(f"[WARNING] File '{file_name}' not found. Merge skipped.")
    required_columns = ['average_distance_to_services', 'number_of_nearby_services', 'pop_65_plus']
    merged_table.dropna(subset=required_columns, inplace=True)
    columns_to_use = [
        'number_of_nearby_services', 'pop_65_plus', 'average_distance_to_services', 
        'Health Centers', 'Pharmacies', 'Hospitals', 'Supermarkets', 
        'Banks', 'Parks and Gardens', 'Post Offices', 'geometry'
    ]
    for col in columns_to_use:
        if col not in merged_table.columns:
            merged_table[col] = None
    final_table = merged_table[columns_to_use].copy()
    return final_table

# --- Class to Suppress Messages During Data Loading ---
class SuppressOutput:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')
    def __exit__(self, exc_type, exc_value, traceback):
        sys.stdout.close()
        sys.stdout = self._original_stdout

file_paths = {
    'average_distance_to_services': 'average_distance_to_services.csv',
    'services_by_category': 'services_by_category.csv',
    'population_65_plus': 'population_65_plus.csv'
}

with SuppressOutput():
    data = load_data(file_paths)
with SuppressOutput():
    df_services = prepare_data(data)

final_columns = [
    'number_of_nearby_services', 'pop_65_plus', 'average_distance_to_services',
    'Health Centers', 'Pharmacies', 'Hospitals', 'Supermarkets', 'Banks',
    'Parks and Gardens', 'Post Offices', 'geometry'
]
df_services_final = df_services[final_columns]

# --- Functions for Interactive Charts and Maps ---
def create_corr_map(gdf):
    cols = [
        'number_of_nearby_services', 'pop_65_plus', 'average_distance_to_services',
        'Health Centers', 'Pharmacies', 'Hospitals', 'Supermarkets',
        'Banks', 'Parks and Gardens', 'Post Offices'
    ]
    cols = [c for c in cols if c in gdf.columns]
    matrix = gdf[cols].corr()
    values = matrix.values
    heatmap = go.Figure(data=go.Heatmap(
        z=values,
        x=cols,
        y=cols,
        colorscale='Viridis',
        zmin=-1, zmax=1,
        colorbar=dict(title='Correlation', tickfont=dict(color='white')),
        text=np.round(values, 2),
        texttemplate='%{text}',
        textfont=dict(color='white', size=12),
    ))
    heatmap.update_layout(
        title_font=dict(size=18, color='white'),
        paper_bgcolor='black',
        plot_bgcolor='black',
        font=dict(color='white'),
        xaxis=dict(title='Variables', tickangle=45, tickfont=dict(color='white'), side='bottom'),
        yaxis=dict(title='Variables', tickfont=dict(color='white'), autorange='reversed'),
        margin=dict(l=100, r=100, t=80, b=150)
    )
    return html.Div([
        html.H3("Correlation between variables", style={'color': 'white', 'text-align': 'center', 'margin-bottom': '20px'}),
        dcc.Graph(figure=heatmap, config={'displayModeBar': False})
    ])

def extract_service_types(row):
    try:
        categories = json.loads(row.replace("'", "\""))
        total = sum(categories.values())
        details = '<br>'.join([f"{key} ({value})" for key, value in categories.items()])
        return f"Total services: {total}<br>{details}"
    except (json.JSONDecodeError, AttributeError):
        return 'Total services: 0'

def format_decimal_column(table, col):
    table[col] = table[col].apply(lambda x: round(x, 1) if pd.notnull(x) else x)
    return table

def create_map_with_services(gdf, service_col):
    center = [41.1490, -8.6291]
    fmap = folium.Map(location=center, zoom_start=13, tiles="CartoDB positron", control_scale=True)
    folium.GeoJson(
        gdf,
        style_function=lambda feature: {'fillColor': 'blue', 'color': 'black', 'weight': 0.5, 'fillOpacity': 0.7},
        tooltip=folium.GeoJsonTooltip(fields=[service_col], aliases=['Available services: '], sticky=True, parse_html=True)
    ).add_to(fmap)
    mouse_position = MousePosition(
        position='topleft',
        separator=' | ',
        prefix='',
        lat_formatter="function(num) {return L.Util.formatNum(num, 2) + '° N';}",
        lng_formatter="function(num) {return L.Util.formatNum(Math.abs(num), 2) + '° W';}"
    )
    fmap.add_child(mouse_position)
    minimap = MiniMap(toggle_display=True, position='topleft', width=140, height=140, zoom_level_offset=-6, tile_layer="OpenStreetMap")
    fmap.add_child(minimap)
    fmap.get_root().html.add_child(folium.Element("""
    <style>
      .leaflet-control-coordinate {
        top: 10px !important;
        left: 10px !important;
        z-index: 10001 !important;
      }
      .leaflet-control-minimap {
        top: 70px !important;
        left: 10px !important;
        bottom: auto !important;
        z-index: 9999 !important;
      }
    </style>
    """))
    return fmap._repr_html_()

def create_map(gdf, value_col, color_scale, legend, tooltip_name):
    gdf = gdf.dropna(subset=[value_col])
    fmap = folium.Map(location=[41.1550, -8.6291], zoom_start=13, min_zoom=13, tiles="CartoDB positron", control_scale=False)
    fmap.get_root().html.add_child(folium.Element(f"""
    <script>
      L.control.scale({{position: 'topleft', metric: true, imperial: false}}).addTo({fmap.get_name()});
    </script>
    """))
    fmap.get_root().html.add_child(folium.Element("""
    <style>
      .leaflet-control-scale.leaflet-control { top: 60px !important; left: 10px !important; }
      .leaflet-control-coordinate { top: 10px !important; left: 10px !important; background-color: white; padding: 2px 6px; font-size: 12px; font-family: Arial, sans-serif; border-radius: 4px; box-shadow: 0 0 4px rgba(0,0,0,0.2); z-index: 10001 !important;}
      .leaflet-control-minimap { top: 70px !important; left: 10px !important; bottom: auto !important; z-index: 9999 !important;}
    </style>
    """))
    mouse_position = MousePosition(
        position='topleft',
        separator=' | ',
        prefix='',
        lat_formatter="function(num) {return L.Util.formatNum(num, 2) + '° N';}",
        lng_formatter="function(num) {return L.Util.formatNum(Math.abs(num), 2) + '° W';}"
    )
    fmap.add_child(mouse_position)
    minimap = MiniMap(toggle_display=True, position='topleft', width=140, height=140, zoom_level_offset=-6, tile_layer="OpenStreetMap")
    fmap.add_child(minimap)
    scale = color_scale.scale(gdf[value_col].min(), gdf[value_col].max())
    scale.caption = legend
    scale.add_to(fmap)
    folium.GeoJson(
        gdf,
        style_function=lambda feat: {
            'fillColor': scale(feat['properties'][value_col]),
            'color': 'black',
            'weight': 0.5,
            'fillOpacity': 0.7,
        },
        tooltip=folium.GeoJsonTooltip(fields=[value_col], aliases=[tooltip_name], sticky=True)
    ).add_to(fmap)
    return fmap._repr_html_()

def show_map(gdf, value_col, color_scale, legend, tooltip_name):
    map_html = create_map(gdf, value_col, color_scale, legend, tooltip_name)
    return HTML(f'<div style="margin-bottom:-150px;">{map_html}</div>')

# --- Read Parish Boundaries and Prepare Parish Map ---
BOUNDARY_COLOR   = '#FF8C00'
BOUNDARY_WEIGHT  = 1
BOUNDARY_OPACITY = 1.0

df_parishes = pd.read_csv('porto_parishes..csv', encoding='utf-8')
df_parishes['geometry'] = df_parishes['geometry'].apply(wkt.loads)
with fiona.open('Continente_CAOP2024.gpkg', layer='cont_municipios') as src:
    original_crs = src.crs
gdf_parishes = gpd.GeoDataFrame(df_parishes, geometry='geometry', crs=original_crs)
gdf_parishes = gdf_parishes.to_crs(epsg=4326)
gdf_parishes['geometry'] = gdf_parishes['geometry'].buffer(0)
minx, miny, maxx, maxy = gdf_parishes.total_bounds
center_lat = (miny + maxy) / 2
center_lon = (minx + maxx) / 2

def create_parishes_map(gdf_parishes):
    m = folium.Map(
        location=[center_lat, center_lon],
        zoom_start=13,
        tiles=None,
        zoom_control=False,
        scrollWheelZoom=True
    )
    folium.TileLayer(
        tiles='CartoDB.DarkMatter',
        attr='©CartoDB',
        name='CartoDB Dark Matter',
        control=False
    ).add_to(m)
    folium.GeoJson(
        gdf_parishes,
        style_function=lambda feat: {
            'fill': True,
            'fillColor': '#00000000',  
            'color':   BOUNDARY_COLOR,
            'weight':  BOUNDARY_WEIGHT,
            'opacity': BOUNDARY_OPACITY
        },
        control=False,
        tooltip=folium.GeoJsonTooltip(
            fields=['parish'],
            aliases=['Parish: '],
            sticky=True
        )
    ).add_to(m)
    mouse_position = MousePosition(
        position='topleft',
        separator=' | ',
        prefix='',
        lat_formatter="function(num) {return L.Util.formatNum(num, 2) + '° N';}",
        lng_formatter="function(num) {return L.Util.formatNum(Math.abs(num), 2) + '° W';}"
    )
    m.add_child(mouse_position)
    minimap = MiniMap(
        toggle_display=True,
        position='topleft',
        width=140,
        height=140,
        zoom_level_offset=-6,
        tile_layer="OpenStreetMap"
    )
    m.add_child(minimap)
    m.get_root().html.add_child(folium.Element("""
    <style>
      .leaflet-control-coordinate {
        top: 10px !important;
        left: 10px !important;
        z-index: 10001 !important;
      }
      .leaflet-control-minimap {
        top: 70px !important;
        left: 10px !important;
        bottom: auto !important;
        z-index: 9999 !important;
      }
    </style>
    """))
    css = """
    <style>
        .leaflet-control-attribution,
        .leaflet-control-layers {
            display: none !important;
        }
    </style>
    """
    m.get_root().header.add_child(Element(css), name='hide_ui')
    return m._repr_html_()

# --- Define Dashboard Layout and Control Panel Structure ---
app = dash.Dash(__name__)

app.index_string = '''
<!DOCTYPE html>
<html>
<head>
    <title>Dashboard</title>
    <style>
        body, html {
            margin: 0;
            padding: 0;
            width: 100%;
            height: 100%;
            background-color: #000;
            overflow-x: hidden;
            overflow-y: auto;
        }
        iframe {
            border: none;
        }
    </style>
</head>
<body>
    {%app_entry%}
    <footer>
        {%config%}
        {%scripts%}
        {%renderer%}
    </footer>
</body>
</html>
'''

app.layout = html.Div([
    html.H1(
        "Data to be used in the case study", 
        style={
            'text-align': 'center', 
            'color': 'white',
            'background-color': '#000', 
            'border': '1px solid white', 
            'padding': '10px',
            'font-weight': 'bold',
            'font-size': '32px'
        }
    ),
    dcc.Tabs(
        id="tabs-maps", 
        value='average_distance_to_services', 
        children=[
            dcc.Tab(label='Average distance to services', value='average_distance_to_services',
                    style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px'},
                    selected_style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px', 'borderTop': '4px solid #ffcc00'}),
            dcc.Tab(label='Number of nearby services', value='number_of_nearby_services',
                    style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px'},
                    selected_style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px', 'borderTop': '4px solid #ffcc00'}),
            dcc.Tab(label='Services available by category', value='services_by_category',
                    style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px'},
                    selected_style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px', 'borderTop': '4px solid #ffcc00'}),
            dcc.Tab(label='Population aged 65 or more', value='pop_65_plus',
                    style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px'},
                    selected_style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px', 'borderTop': '4px solid #ffcc00'}),
            dcc.Tab(label='Correlation matrix', value='correlation',
                    style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px'},
                    selected_style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px', 'borderTop': '4px solid #ffcc00'}),
            dcc.Tab(label='Parishes', value='parishes',
                    style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px'},
                    selected_style={'backgroundColor': '#000', 'color': 'white', 'padding': '10px', 'borderTop': '4px solid #ffcc00'}),
        ],
        style={'backgroundColor': '#000', 'border': '1px solid #333'}
    ),
    html.Div(id='map-container')
], style={
    'backgroundColor': '#000', 
    'color': 'white', 
    'minHeight': '100vh',
    'margin': '0',
    'padding': '0',
    'overflow': 'auto'
})

# --- Display Corresponding Map or Chart Based on Selected Tab ---
@app.callback(Output('map-container', 'children'), [Input('tabs-maps', 'value')])
def render_map(tab):
    if tab == 'average_distance_to_services':
        gdf = format_decimal_column(data['average_distance_to_services'], 'average_distance_to_services')
        return html.Iframe(
            srcDoc=create_map(
                gdf, 'average_distance_to_services', linear.YlOrRd_09, 
                'Average distance to services per building', 'Average distance: '
            ),
            width='100%', height='600'
        )
    elif tab == 'number_of_nearby_services':
        gdf = data['services_by_category']
        return html.Iframe(
            srcDoc=create_map(
                gdf, 'number_of_nearby_services', linear.Purples_09, 
                'Number of nearby services per building', 'Services: '
            ),
            width='100%', height='600'
        )
    elif tab == 'services_by_category':
        gdf = data['services_by_category']
        gdf['service_types_text'] = gdf['services_by_category'].apply(extract_service_types)
        return html.Iframe(
            srcDoc=create_map_with_services(gdf, 'service_types_text'),
            width='100%', height='600'
        )
    elif tab == 'pop_65_plus':  
        gdf = data['population_65_plus']
        return html.Iframe(
            srcDoc=create_map(
                gdf, 'pop_65_plus', linear.Reds_09, 
                'Population aged 65 or more per building', 'Population 65+ per building: '
            ),
            width='100%', height='600'
        )
    elif tab == 'correlation':
        return create_corr_map(df_services)
    elif tab == 'parishes':  
        return html.Iframe(
            srcDoc=create_parishes_map(gdf_parishes),
            width='100%', height='600'
        )
        
# Save the full merged DataFrame before defining the port
df_services.to_pickle("df_services.pkl")

# --- Find a free network port for the interactive dashboard ---
def find_free_port():
    while True:
        port = random.randint(8000, 9000)
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            if s.connect_ex(("localhost", port)) != 0:
                return port

port = find_free_port()

if __name__ == '__main__':
    app.run(debug=False, port=port)

print("\033[92m[INFO] After analysis, you may continue.\033[0m")

[92m[INFO] After analysis, you may continue.[0m


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<div style="border: none; margin: 5px 0; border-top: 1px dashed #FFFFFF; border-bottom: 1px dashed #FFFFFF; height: 5px;"></div>

Next: [Clustering](clustering.ipynb)

<div style="border: none; margin: 5px 0; border-top: 1px dashed #FFFFFF; border-bottom: 1px dashed #FFFFFF; height: 5px;"></div>