In [1]:
import pandas as pd
import geopandas as gp
from src.utils.map_vis import geojson_per_row
import folium
from shapely.ops import cascaded_union
from shapely.geometry import Polygon, MultiPolygon

%matplotlib inline

# Mexico

In [2]:
from src.utils import fix_spanish_encoding

In [3]:
# http://www.inegi.org.mx/geo/contenidos/geoestadistica/m_g_0.aspx
shp_urban = gp.read_file('data/mexico/mglu2014v6_2/mglu2015v6_2.shp')
shp_state = gp.read_file('data/mexico/mge2014v6_2/mge2015v6_2.shp')
shp_munic = gp.read_file('data/mexico/mgm2014v6_2/mgm2015v6_2.shp')

# get Urban id
shp_urban.NOM_LOC = shp_urban.NOM_LOC.apply(fix_spanish_encoding)
shp_urban['URBAN_ID'] = shp_urban.CVE_ENT + shp_urban.CVE_MUN + shp_urban.CVE_LOC

# get the full data frame
shp_urban_with_mun = shp_urban.merge(shp_state.drop('geometry', axis=1)).merge(shp_munic.drop('geometry', axis=1))

In [4]:
shp_urban_with_mun.head()

Unnamed: 0,CVE_ENT,CVE_MUN,CVE_LOC,NOM_LOC,geometry,URBAN_ID,NOM_ENT,NOM_MUN,concat
0,1,1,1,Aguascalientes,POLYGON ((-102.3016558772169 21.94197733808375...,10010001,Aguascalientes,Aguascalientes,1001
1,1,1,239,General José María Morelos y Pavón (Cañada Honda),POLYGON ((-102.2040776149899 21.99715938539737...,10010239,Aguascalientes,Aguascalientes,1001
2,1,1,293,Norias de Ojocaliente,POLYGON ((-102.2119687378669 21.89791913825769...,10010293,Aguascalientes,Aguascalientes,1001
3,1,1,357,Norias del Paso Hondo,POLYGON ((-102.2023650773864 21.85918261458602...,10010357,Aguascalientes,Aguascalientes,1001
4,1,1,479,Villa Licenciado Jesús Terán (Calvillito),POLYGON ((-102.1868744152221 21.84734361541558...,10010479,Aguascalientes,Aguascalientes,1001


## metro

In [5]:
metro = gp.read_file('data/mexico/Mapa_de_delimitacion_de_zonas_metropolitanas_2010/ZM_2010.shp')
sun_ids = [31, 21, 32, 28, 24, 11, 12, 41, 37, 3, 17,2,25,45]
sun_names = ['Monterrey, Nuevo León', 'Guadalajara, Jalisco', 'Oaxaca de Juárez, Oaxaca', 'Cuernavaca, Morelos', 'Toluca de Lerdo, México', 'Juárez, Chihuahua', 'Chihuahua, Chihuahua', 'Villahermosa, Tabasco', 'Cancún, Quintana Roo', 'Mexicali, Baja California', 'Acapulco de Juárez, Guerrero', 'Tijuana, Baja California', 'Morelia, Michoacán de Ocampo', 'Nuevo Laredo, Tamaulipas','Culiacán Rosales, Sinaloa', 'Valle de Mexico, Distrito Federal']

sun_munic_codes = {'Culiacán Rosales, Sinaloa': ['25006']}

suns = []
for cve,name in zip(sun_ids,sun_names):
    sun = metro[metro.CVE_SUN==cve]
    geometry = cascaded_union(sun.geometry.tolist())
    pob = sun.POB_2010.sum()
    suns.append({'name': name, 'cve_sun': cve, 'population': pob, 'nom_sun':sun.NOM_SUN.unique()[0], 'geometry':geometry})
    sun_munic_codes[name] = sun.CVE_MUN1.values

In [6]:
# use municipal area for metro-culiacan instead of munic-head. Other metro are municipal level
# culiacan_sinaloa = selected_localidades_mex[selected_localidades_mex.URBAN_ID=='250060001'].to_crs(metro.crs).iloc[0].to_dict()
# population from: https://www.citypopulation.de/php/mexico-metro.php
culiacan_sinaloa = shp_munic[shp_munic['concat']=='25006'].to_crs(metro.crs).iloc[0].to_dict()
suns.append({'name': sun_names[-2], 'cve_sun': 99, 'population': 858638, 'nom_sun':sun_names[-2], 
             'geometry':culiacan_sinaloa['geometry']})

In [7]:

sun = metro[(metro.CVE_SUN==13) & (metro.CVE_ENT=='09')]
geometry = cascaded_union(sun.geometry.tolist())
pob = sun.POB_2010.sum()
suns.append({'name': sun_names[-1], 'cve_sun': 13, 'population': pob, 'nom_sun':sun.NOM_SUN.unique()[0], 'geometry':geometry})

In [8]:
suns=gp.GeoDataFrame(suns)
suns.crs = metro.crs
suns = suns.to_crs(epsg=4326)

In [9]:
suns = suns[['name','population', 'cve_sun', 'geometry']].sort_values('population', ascending=False)

In [10]:
suns.to_file('data/mex_16_metropolitans.geojson', driver="GeoJSON")

## urban areas

In [11]:
pop_local = pd.read_csv('data/mexico/Localidades-population.csv', index_col=0)

Fix the holes in urban areas of DF mexico city, 

In [12]:
def get_pop(shp, pop_local):
    pop_shp = pop_local[pop_local['Clave de localidad'].isin(shp.URBAN_ID.astype(int))]
    assert pop_shp.shape[0]==shp.shape[0]
    return pop_shp['Población total'].sum()

In [13]:
polys = cascaded_union(shp_urban_with_mun[shp_urban_with_mun.CVE_ENT=='09'].geometry)
p0=polys[0]
p0 = Polygon(p0.exterior, [p0.interiors[115]])
p4 = Polygon(polys[4].exterior)

mex_city_pop = get_pop(shp_urban_with_mun[shp_urban_with_mun.CVE_ENT=='09'], pop_local)

mex_city_geo = MultiPolygon([p0, p4]+list(polys[1:4]) + list(polys[5:]))
mexico_city = {'name': 'Valle de Mexico, Distrito Federal', 'CVE_ENT': '09', 'CVE_MUN': '000', 'CVE_LOC': '0000', 
               'NOM_LOC': 'Mexico city in DC', 'NOM_ENT': 'Distrito Federal',
               'NOM_MUN': 'Mexico city in DC', 'concat': '09000', 'URBAN_ID' :'090000000',
               'population': mex_city_pop, 'geometry': mex_city_geo}
mexico_city = gp.GeoDataFrame([mexico_city])

In [14]:
def get_urban_areas(keep_munic_head=False):
    selected_urbans = []

    for name, cve_mun in sun_munic_codes.items():
        urban_in_metro = shp_urban_with_mun[shp_urban_with_mun.concat.isin(cve_mun)]
        if keep_munic_head:
            urban_in_metro = urban_in_metro[urban_in_metro.CVE_LOC=='0001']
            
        pop = get_pop(urban_in_metro, pop_local)
        geometry = cascaded_union(urban_in_metro.geometry)
        selected_urbans.append([name, 
                                pop, 
                                '|'.join(urban_in_metro.CVE_ENT.unique()), 
                                '|'.join(urban_in_metro.CVE_MUN.unique()), 
                                '|'.join(urban_in_metro.NOM_ENT.unique()),
                                '|'.join(urban_in_metro.NOM_MUN.unique()), 
                                geometry])

    columns=['name', 'population','CVE_ENT','CVE_MUN', 'NOM_ENT','NOM_MUN','geometry']
    selected_urbans = gp.GeoDataFrame(selected_urbans, columns=columns).append(mexico_city[columns], ignore_index=True)
    selected_urbans = selected_urbans.sort_values('population', ascending=False)
    return selected_urbans

### municipality head

In [49]:
# Deprecated
cities = ['Acapulco de Juárez, Guerrero', 'Cancún, Quintana Roo', 'Juárez, Chihuahua', 'Chihuahua, Chihuahua', 
          'Cuernavaca, Morelos', 'Culiacán Rosales, Sinaloa', 'Guadalajara, Jalisco', 'Mexicali, Baja California', 
          'Monterrey, Nuevo León', 'Morelia, Michoacán de Ocampo', 'Nuevo Laredo, Tamaulipas', 'Oaxaca de Juárez, Oaxaca', 
          'Tijuana, Baja California', 'Toluca de Lerdo, México', 'Villahermosa, Tabasco', 'Valle de Mexico, Distrito Federal']

idx = [742, 3396, 454, 431, 2095, 3515, 1186, 48, 2280, 1943, 3815, 2373, 71, 1771, 3709]
shp_mun_head = shp_urban_with_mun.loc[idx].copy()
shp_mun_head.URBAN_ID = shp_mun_head.URBAN_ID.astype(int)
shp_mun_head= shp_mun_head.merge(pop_local[['Población total', 'Clave de localidad']], 
                                 left_on='URBAN_ID', right_on='Clave de localidad'
                                ).drop('Clave de localidad', axis=1).rename(columns={'Población total': 'population'})
selected_munic_head = shp_mun_head.append(mexico_city, ignore_index=True, sort=False)

cols = ['URBAN_ID', 'NOM_ENT', 'NOM_MUN','NOM_LOC',  'CVE_ENT', 'CVE_MUN', 'CVE_LOC', 'population', 'geometry']
selected_munic_head = selected_munic_head[cols]
selected_munic_head['name'] = cities

selected_munic_head=selected_munic_head[['name', 'population', 'URBAN_ID', 'NOM_ENT', 'NOM_MUN','NOM_LOC',  'CVE_ENT', 'CVE_MUN', 'CVE_LOC', 'geometry']].sort_values('population', ascending=False)

In [15]:
selected_munic_head = get_urban_areas(keep_munic_head=True)

In [16]:
selected_munic_head.to_file('data/mex_16_munic_head.geojson', driver="GeoJSON")

### municipality urban areas

In [None]:
# Deprecated
selected_urban_unmerged = shp_urban_with_mun[shp_urban_with_mun['concat'].isin(shp_urban_with_mun.loc[idx]['concat'])]

selected_urbans = []

for i in idx:
    concat_value = shp_urban_with_mun.loc[i,'concat']
    munic = shp_urban_with_mun[shp_urban_with_mun['concat']==concat_value]
    geometry = cascaded_union(munic.geometry)
    pop = get_pop(munic, pop_local)
    selected_urbans.append([munic.CVE_ENT.iloc[0], munic.CVE_MUN.iloc[0], munic.NOM_ENT.iloc[0],
                            munic.NOM_MUN.iloc[0], geometry, pop])

columns=['CVE_ENT','CVE_MUN', 'NOM_ENT','NOM_MUN','geometry', 'population']
selected_urbans = gp.GeoDataFrame(selected_urbans, columns=columns).append(mexico_city[columns], ignore_index=True)
selected_urbans['name'] = cities

selected_urbans=selected_urbans[['name', 'population', 'CVE_ENT','CVE_MUN', 'NOM_ENT','NOM_MUN','geometry']].sort_values('population', ascending=False)

In [17]:
selected_urbans = get_urban_areas(keep_munic_head=False)

In [18]:
selected_urbans.to_file('data/mex_16_munic_urban_merge.geojson', driver="GeoJSON")

In [19]:
pd.DataFrame(list(zip(selected_munic_head.name.values, selected_urbans.name.values, suns.name.values)))

Unnamed: 0,0,1,2
0,"Valle de Mexico, Distrito Federal","Valle de Mexico, Distrito Federal","Valle de Mexico, Distrito Federal"
1,"Monterrey, Nuevo León","Guadalajara, Jalisco","Guadalajara, Jalisco"
2,"Guadalajara, Jalisco","Monterrey, Nuevo León","Monterrey, Nuevo León"
3,"Tijuana, Baja California","Tijuana, Baja California","Toluca de Lerdo, México"
4,"Juárez, Chihuahua","Toluca de Lerdo, México","Tijuana, Baja California"
5,"Chihuahua, Chihuahua","Juárez, Chihuahua","Juárez, Chihuahua"
6,"Toluca de Lerdo, México","Mexicali, Baja California","Mexicali, Baja California"
7,"Cuernavaca, Morelos","Cuernavaca, Morelos","Cuernavaca, Morelos"
8,"Mexicali, Baja California","Chihuahua, Chihuahua","Acapulco de Juárez, Guerrero"
9,"Acapulco de Juárez, Guerrero","Culiacán Rosales, Sinaloa","Culiacán Rosales, Sinaloa"


## maps

In [22]:
some_map = folium.Map(location=[24.062199, -102.870324], zoom_start=5)
geojson_per_row(suns.reset_index(), name='metro', tip_cols=['name','population','cve_sun'], some_map=some_map)
# geojson_per_row(selected_urban_unmerged, name='munic-urban-unmerged', color='grey', tip_cols=['URBAN_ID', 'NOM_ENT', 'NOM_MUN','NOM_LOC'], some_map=some_map)
geojson_per_row(selected_urbans, name='munic-urban', color='yellow', tip_cols=['NOM_ENT', 'NOM_MUN','name'], some_map=some_map)
geojson_per_row(selected_munic_head, name='munic-head', color='red', tip_cols=['NOM_ENT', 'NOM_MUN','name'], some_map=some_map)

folium.LayerControl().add_to(some_map)
some_map.save('maps/metro_urban_areas.html')

## mexico country

In [None]:
%%time
mex = cascaded_union(shp_state.geometry.values)

In [None]:
new_polys = [Polygon(p.exterior) for p in mex]
new_polys = MultiPolygon(new_polys)
mex_gpdf = gp.GeoDataFrame([[mex,]], columns=['geometry'])
clean_mex = gp.GeoDataFrame([[new_polys,]], columns=['geometry'])

In [None]:
maps = folium.Map(location=[19.381495, -99.139095], zoom_start=6)
folium.GeoJson(mex_gpdf.to_json(), name='raw cascad union').add_to(maps)
folium.GeoJson(clean_mex.to_json(), name='keep exterior only').add_to(maps)
folium.LayerControl().add_to(maps)
maps.save('maps/mexico_country.html')

In [None]:
clean_mex.to_file('data/mex_country.geojson', driver='GeoJSON')