In [1]:
import os
import sys

import pandas as pd
import geopandas as gpd
import osmnx as ox
import numpy as np

from shapely import wkt

import matplotlib.pyplot as plt
import seaborn as sns

from pandas.api.types import CategoricalDtype

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    import aup



2023-12-05 11:10:09 Configured OSMnx 1.1.2
2023-12-05 11:10:09 HTTP response caching is on


In [None]:
# Query municipalities
mun_schema = 'marco'
mun_table = 'mpos_2020'
query = f"SELECT * FROM {mun_schema}.{mun_table} "
mun_gdf = aup.gdf_from_query(query, geometry_col='geometry')

# Create Mexico shape
mun_gdf = mun_gdf.to_crs("EPSG:6372")
mun_buffer = mun_gdf.buffer(1)
mun_gdf_buffer = gpd.GeoDataFrame(mun_buffer)
mun_gdf_buffer.rename(columns={0:'geometry'},inplace=True)
mexico = mun_gdf_buffer.dissolve()
mexico = mexico.to_crs("EPSG:4326")

# Show
mexico.plot()

In [None]:
cell = '85d'
#next_cells = []
#done_cells = ['80d','813','86d','815','83f','841','85b','86f','865','86b','8f5'(2522s),
# '867','869','85f','859','843']
#failed_cells = []

chunksize = 5000000

directory = f"../../../data/external/building_footprints/{cell}_buildings.csv"
aup.log(f'Started reading file for cell {cell}')

for buildings_tmp in pd.read_csv(directory, chunksize=chunksize):
    # process each chunk here
    aup.log('Finished reading file')

    buildings_tmp['geometry'] = buildings_tmp['geometry'].apply(wkt.loads)
    buildings_gdf = gpd.GeoDataFrame(buildings_tmp, crs='epsg:4326')
    del buildings_tmp
    buildings_gdf['cell'] = cell

    aup.log('Finished assigning geometry')

    limit_len = 500000
    if len(buildings_gdf)>limit_len:
        c_upload = len(buildings_gdf)/limit_len
        for k in range(int(c_upload)+1):
            aup.log(f"Starting range k = {k} of {int(c_upload)}")
            gdf_inter_upload = buildings_gdf.iloc[int(limit_len*k):int(limit_len*(1+k))].copy()
            aup.gdf_to_db_slow(gdf_inter_upload,'buildings_google_v3','google_buildings',if_exists="append")
    else:
        aup.log('Starting upload of all data')
        aup.gdf_to_db_slow(buildings_gdf,'buildings_google_v3','google_buildings',if_exists="append")

    del buildings_gdf
    del gdf_inter_upload

In [None]:
mexico_buildings = gpd.sjoin(buildings_gdf,mexico,how='inner')

# Show
print(mexico_buildings.shape)
mexico_buildings.head(2)