# OdC's 2025 Temperature Geovisor Tutorial svgs

This notebook loads __specific city's__ temperature data, processes and __creates the gdfs needed for the social networks video tutorial.__

## __Import libraries__

In [2]:
from pathlib import Path

current_path = Path().resolve()

for parent in current_path.parents:
    if parent.name == "accesibilidad-urbana":
        project_root = parent
        break

print(project_root)

/home/jovyan/accesibilidad-urbana


In [3]:
import os
import sys

import pandas as pd
import geopandas as gpd
import numpy as np

import matplotlib.pyplot as plt

# Imports que venían en el Notebook 15-min-city > 13-15-min-kepler-test.ipynb
import io
#import boto3
from keplergl import KeplerGl

# Classify data using Natural Breaks
import mapclassify

# Correlation calc
import scipy.stats as stats

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join(project_root))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup
else:
    import aup

## __Config notebook__

In [4]:
# Cities chosen using AI
high_termal_variability_expected = ['CDMX','Guadalajara','Monterrey','Chihuahua','Queretaro']
low_termal_variability_expected = ['Tepic','Merida','Villahermosa','Mazatlan','Oaxaca']
city_lst = high_termal_variability_expected + low_termal_variability_expected
# Trigger merge_capital
city_lst.append('ZMVM')

# Test
#city_lst = ['Aguascalientes']

# Resolution of analysis
res = 10
# Projection to be used
projected_crs = 'EPSG:6372'
# Processing - Filter for urban areas only?
filter_urban = True
# Processing - If running CDMX (capital city), add ZMVM (metropolitan area)
merge_capital = True
# Processing - Categorize data using manual input? (Else uses jenks)
manual_categorization = True

# Directory where .html files are saved
output_dir = str(project_root) + f"/data/processed/visor_temperature_tutorial-gdfs/"

## __Find all available cities__ (If test = False, substitutes city_list with all cities available in db)

In [5]:
# If merge_capital, drop ZMVM in order to load ZMVM when loading CDMX during processing.
if merge_capital and ('CDMX' in city_lst) and ('ZMVM' in city_lst):
    print("--"*30)
    print("merge_capital IS SET TO TRUE: City list contains both CDMX and ZMVM. Will merge both databases and create a unified temperature anomaly.")
    print("--"*30)
    city_lst.remove('ZMVM')

print(f"{len(city_lst)} cities to run at res {res}.")
city_lst

------------------------------------------------------------
merge_capital IS SET TO TRUE: City list contains both CDMX and ZMVM. Will merge both databases and create a unified temperature anomaly.
------------------------------------------------------------
10 cities to run at res 10.


['CDMX',
 'Guadalajara',
 'Monterrey',
 'Chihuahua',
 'Queretaro',
 'Tepic',
 'Merida',
 'Villahermosa',
 'Mazatlan',
 'Oaxaca']

## __Create Kepler HTMLs files by city__

In [6]:
i=0
for city in city_lst:

    try:
        print("--"*30)
        print(f"--- STARTING CITY {i}/{len(city_lst)}: {city}.")
    
        # 1.0 --- --- --- LOAD DATA --- --- ---
        
        # 1.1 --- LOAD URBAN HEXS
        print(f"Loading {city}'s urban hexs.")
        # Load data
        hex_schema = 'hexgrid'
        hex_table = f'hexgrid_{res}_city_2020'
        # Load city's hexs filtering for urban areas if required
        if filter_urban:
            hex_type = 'urban'
            query = f"SELECT hex_id_{res}, geometry FROM {hex_schema}.{hex_table} WHERE \"city\" = '{city}\' AND \"type\" = '{hex_type}\'"
        else:
            query = f"SELECT hex_id_{res}, geometry FROM {hex_schema}.{hex_table} WHERE \"city\" = '{city}\'"
        hex_gdf = aup.gdf_from_query(query, geometry_col='geometry')
        
        # SPECIFIC CASE - Merge capital's hexs (CDMX + ZMVM)
        if merge_capital and (city == 'CDMX'):
            print("MERGING CDMX + ZMVM hexs.")
            # Load ZMVM's hexs filtering for urban areas if required
            if filter_urban:
                hex_type = 'urban'
                query = f"SELECT hex_id_{res}, geometry FROM {hex_schema}.{hex_table} WHERE \"city\" = 'ZMVM\' AND \"type\" = '{hex_type}\'"
            else:
                query = f"SELECT hex_id_{res}, geometry FROM {hex_schema}.{hex_table} WHERE \"city\" = 'ZMVM\'"
            metro_hex_gdf = aup.gdf_from_query(query, geometry_col='geometry')
            # Merge CDMX and ZMVM hexs
            hex_gdf = pd.concat([hex_gdf,metro_hex_gdf])
            
        # Read and format cols
        hex_gdf['res'] = res
        hex_gdf.rename(columns={f'hex_id_{res}':'hex_id'},inplace=True)
        hex_gdf.to_crs(projected_crs,inplace=True)
        # List all unique hex_ids
        hexid_lst = list(hex_gdf.hex_id.unique())
        del hex_gdf
        
        # 1.2 --- LOAD TEMPERATURE DATA
        print(f"Loading {city}'s Temperature data.")
        temp_schema = 'raster_analysis'
        temp_table = 'temperature_analysis_hex'
        query = f"SELECT * FROM {temp_schema}.{temp_table} WHERE \"city\" = '{city}\' AND \"res\" = '{res}\'"
        temperature_gdf = aup.gdf_from_query(query, geometry_col='geometry')

        # SPECIFIC CASE - Merge capital's temperature hexs (CDMX + ZMVM)
        if merge_capital and (city == 'CDMX'):
            print("MERGING CDMX + ZMVM TEMPERATURE.")
            # Load metro area's temperature hexs
            query = f"SELECT * FROM {temp_schema}.{temp_table} WHERE \"city\" = 'ZMVM\' AND \"res\" = '{res}\'"
            metro_temp_gdf = aup.gdf_from_query(query, geometry_col='geometry')
            # Merge capital and metro area temperature hexs
            temperature_gdf = pd.concat([temperature_gdf,metro_temp_gdf])
            # Drop duplicated temperature hexs (Both in CDMX and ZMVM)
            temperature_gdf.drop_duplicates(subset="hex_id",
                                            inplace=True)
        
        temperature_gdf.to_crs(projected_crs,inplace=True)
        # Filter for urban areas if required
        if filter_urban:
            temperature_gdf = temperature_gdf.loc[temperature_gdf.hex_id.isin(hexid_lst)].copy()
        # Inf values check
        infs = temperature_gdf.loc[np.isinf(temperature_gdf['temperature_mean'])]
        if len(infs)>0:
            print(f"WARNING: Dropping {len(infs)} hexs res {res} because of inf values.")
            temperature_gdf = temperature_gdf.loc[~np.isinf(temperature_gdf['temperature_mean'])].copy()
        
        # 2.0 --- --- --- CALCULATIONS AND DATA TREATMENT --- --- ---
        
        # 2.1 --- CALCULATE TEMPERATURE ANOMALY
        print("DATA TREATMENT - Calculating temperature anomaly.")
        # Calculate anomaly by hex (differential between mean in each hex and city mean)
        mean_city_temperature = temperature_gdf.temperature_mean.mean()
        temperature_gdf['temperature_anomaly'] = temperature_gdf['temperature_mean'] - mean_city_temperature
        
        # 2.2 --- CATEGORIZE TEMPERATURE ANOMALY USING PREDEFINED VALUES
        if manual_categorization:
            ########### CATEGORIZATION OPTION 1: SET MANUALLY ###########
            # Define costumized boundaries
            classif_bins = [-100,-3.5,-1.5,-0.5,0.5,1.5,3.5,100] #Edgar
            #classif_bins = [-15, -6.5, -3.5, -1.30, 0.35, 2.0, 4.0, 15] #Based in Monterrey
            # Clasify data in bins
            temperature_gdf['anomaly_class'] = pd.cut(temperature_gdf['temperature_anomaly'],
                                                      bins=classif_bins,
                                                      labels=[-3, -2, -1, 0, 1, 2, 3],
                                                      include_lowest=True).astype(int)
        else:
            ########### CATEGORIZATION OPTION 2: SET USING JENKS ###########
            print("DATA TREATMENT - Categorize temperature anomaly using jenks.")
            # Calculate Natural Breaks (Jenks) cuts (using 7 classes)
            classifier = mapclassify.NaturalBreaks(y=temperature_gdf['temperature_anomaly'], k=7)
            # Add class to each hex
            temperature_gdf['anomaly_class'] = classifier.yb
            # Rename classes to be between -3 (colder) and 3 (hotter)
            temperature_gdf['anomaly_class'] -= 3
            # Extract array of Natural Breaks upper bounds
            classif_bins = classifier.bins
        
        # 2.3 --- SET CATEGORY'S NAME TO BE DISPLAYED ON GEOVISOR (Creates bins columns)
        print("DATA TREATMENT - Renaming categories.")
        # Create a column containing anomaly classes
        # (Used to display the layer using a color palette and show the simbology)
        classes_dict = {3:f"1. Más caliente",
                        2:"2.",
                        1:"3.",
                        0:"4.",
                        -1:"5.",
                        -2:"6.",
                        -3:f"7. Más fresco"
                       }
        temperature_gdf['anomaly_bins'] = temperature_gdf['anomaly_class'].map(classes_dict)
        
        # Create a similar but more detailed column
        # (Used to display detailed information when hovering over hexs)
        decimals = 2 #Number of decimals for temperature bins to be show
        classes_dict_detailed = {3:f"{abs(round(classif_bins[6],decimals))}° o más sobre el promedio.",
                                 2:f"{abs(round(classif_bins[5],decimals))}° a {abs(round(classif_bins[6],decimals))}° sobre el promedio.",
                                 1:f"{abs(round(classif_bins[4],decimals))}° a {abs(round(classif_bins[5],decimals))}° sobre el promedio.",
                                 0:"Cercano al promedio.",
                                 -1:f"{abs(round(classif_bins[3],decimals))}° a {abs(round(classif_bins[2],decimals))}° debajo del promedio.",
                                 -2:f"{abs(round(classif_bins[2],decimals))}° a {abs(round(classif_bins[1],decimals))}° debajo del promedio.",
                                 -3:f"{abs(round(classif_bins[1],decimals))}° o más debajo del promedio."
                                }
        temperature_gdf['anomaly_details'] = temperature_gdf['anomaly_class'].map(classes_dict_detailed)
        
        # 2.4 --- CONVERT TO CATEGORICAL ORDER
        print("DATA TREATMENT - Setting bins as categorical values.")
        # Define order and convert col into ordered category
        temperature_categories = list(classes_dict.values())
        temperature_gdf['anomaly_bins'] = pd.Categorical(temperature_gdf['anomaly_bins'], 
                                                         categories=temperature_categories, 
                                                         ordered=True)
        # Force categorical order
        temperature_gdf.sort_values(by='anomaly_bins', inplace=True)

        # 2.5 --- CONSIDER TEMPERATURE ANOMALY DIRECTLY, ROUNDING VALUE
        temperature_gdf['temperature_anomaly_rounded'] = temperature_gdf['temperature_anomaly'].round(2)

        # 3.0 --- --- --- DISSOLVE GDF --- --- ---
        print("DISSOLVE GDF.")
        # Dissolve by anomaly class
        temperature_gdf_diss = temperature_gdf.dissolve('anomaly_class')
        # Reset index to be able to access anomaly_class
        temperature_gdf_diss.reset_index(inplace=True)
        # Keep columns of interest
        temperature_gdf_diss = temperature_gdf_diss[['anomaly_class','geometry']]
        
        # 4.0 --- --- --- SAVE GDF --- --- ---
        print("SAVE GDF.")
        # SPECIFIC CASE - Merge capital's Temperature (CDMX + ZMVM) file name
        if merge_capital and (city == 'CDMX'):
            file_name = f'TemperatureGDF_{city}-ZMVM_res{res}.gpkg'
        else:
            file_name = f'TemperatureGDF_{city}_res{res}.gpkg'
        temperature_gdf_diss.to_file(output_dir+f"{file_name}")
        print(f"SAVING (FINAL) - Saved {city}'s gdf.")
    
    # In case of error while running city
    except:
        print("--"*30)
        print(f"ERROR: {city}.")
        print("--"*30)

    i+=1

------------------------------------------------------------
--- STARTING CITY 0/10: CDMX.
Loading CDMX's urban hexs.
MERGING CDMX + ZMVM hexs.
Loading CDMX's Temperature data.
MERGING CDMX + ZMVM TEMPERATURE.
DATA TREATMENT - Calculating temperature anomaly.
DATA TREATMENT - Renaming categories.
DATA TREATMENT - Setting bins as categorical values.
DISSOLVE GDF.
SAVE GDF.
SAVING (FINAL) - Saved CDMX's gdf.
------------------------------------------------------------
--- STARTING CITY 1/10: Guadalajara.
Loading Guadalajara's urban hexs.
Loading Guadalajara's Temperature data.
DATA TREATMENT - Calculating temperature anomaly.
DATA TREATMENT - Renaming categories.
DATA TREATMENT - Setting bins as categorical values.
DISSOLVE GDF.
SAVE GDF.
SAVING (FINAL) - Saved Guadalajara's gdf.
------------------------------------------------------------
--- STARTING CITY 2/10: Monterrey.
Loading Monterrey's urban hexs.
Loading Monterrey's Temperature data.
DATA TREATMENT - Calculating temperature anom