# MERGING GEOJSON

In [4]:
from os import listdir
from os.path import isfile, join

import re
from itertools import combinations
from itertools import chain

import pandas as pd
import numpy as np

import geopandas as gpd
import geojson
import rtree

import folium.folium

import seaborn as sns
import matplotlib.pyplot as plt

import folium
from folium import plugins

%matplotlib inline

pd.set_option('display.max_rows', 1000)

In [5]:
CLEAN_PATH = '../data/clean' # transform this into temp file

NAMES_DICT = {
    
    'cad_pa_gdf': 'CAD_PA',
    'o_build_gdf': 'O_CONS',
    'p_build_gdf': 'P_BU',
    'build_gdf': 'BU',
    'cad_zo_gdf': 'CAD_ZO'
}

In [10]:
all_raw_geofiles = [f for f in listdir(CLEAN_PATH) if isfile(join(CLEAN_PATH, f))]
all_raw_geofiles

['cadastralparcel.geojson',
 'otherconstruction.geojson',
 'cadastralparcel_2.geojson',
 'cadastralzoning_2.geojson',
 'buildingpart.geojson',
 'cadastralzoning.geojson',
 'building.geojson',
 '28900.geojson']

In [71]:
def merge_to_main_gdf(main_gdf, merged_gdf)
    """
    input:
    output: 
    """
    cols_to_keep_out = ['geometry', 'yearOfConstruction', 'label']
    cols_to_select = [col if col not in cols_to_keep_out for col in gdf.columns.tolist()]
    
    if len(merged_gdf['ID'].unique()) == len(merged_gdf['ID'].array):
        return main_gdf.merge(merged_gdf[cols_to_select], on ='ID')
    else:
        
        aux_gdf = merged_gdf[cols_to_select].groupby(['ID']).max() # max as agg
        aux_gdf.reset_index(inplace = True)

        return main_gdf.merge(aux_gdf, on ='ID')

### LOADING BUILDING DATASET

In [61]:
build_gdf = gpd.read_file(f"{CLEAN_PATH}/building.geojson")
build_gdf.name = NAMES_DICT['build_gdf']
build_gdf.head(2)

Unnamed: 0,ID,conditionOfConstruction,currentUse,numberOfBuildingUnits,numberOfDwellings,area_m2c,yearOfConstruction,geometry
0,VK4700H,-,,0,0,0,2014,"POLYGON ((440433.629 4470953.961, 440432.170 4..."
1,00006Z8VK4800A,-,,0,0,0,2017,"POLYGON ((440020.830 4479859.707, 440020.064 4..."


### LOADING BUILDING PART DATASET

In [64]:
# LOADING
p_build_gdf = gpd.read_file(f"{CLEAN_PATH}/buildingpart.geojson")
p_build_gdf.name = NAMES_DICT['p_build_gdf']
p_build_gdf.head(2)

Unnamed: 0,ID,nFloors_AG,height_BG,ID_part,yearOfConstruction,geometry
0,000200100VK48E,1,0,1,2004,"POLYGON ((441657.574 4487050.292, 441658.164 4..."
1,000200500VK56E,1,0,1,2013,"POLYGON ((451584.920 4467181.410, 451586.250 4..."


In [None]:
# merging to building-geojson
build_gdf = merge_to_main_gdf(main_gdf = build_gdf, 
                              merged_gdf = p_build_gdf)

### LOADING CADASTRAL PARCEL DATASET

In [None]:
cad_pa_gdf = gpd.read_file(f"{CLEAN_PATH}/cadastralparcel.geojson")
cad_pa_gdf.name = NAMES_DICT['cad_pa_gdf']

In [None]:
# merging to building-geojson
build_gdf = merge_to_main_gdf(main_gdf = build_gdf, 
                              merged_gdf = cad_pa_gdf)

In [84]:
# cad_pa_gdf.rename(columns = {'geometry': 'geometry_cad'}, inplace = True)
# cad_pa_gdf_2 = cad_pa_gdf[['ID', 'area_m2p', 'geometry_cad']]
# cad_pa_gdf_2

build_gdf = build_gdf.merge(cad_pa_gdf_2, on='ID')

In [85]:
build_gdf

Unnamed: 0,ID,conditionOfConstruction,currentUse,numberOfBuildingUnits,numberOfDwellings,area_m2c,yearOfConstruction,geometry,nFloors_AG,height_BG,ID_part,area_m2p,geometry_cad
0,000200100VK48E,functional,2_agriculture,1,0,26,2004,"POLYGON ((441657.574 4487050.292, 441658.164 4...",1,0,1,26,"MULTIPOLYGON (((441657.574 4487050.292, 441658..."
1,000200500VK56E,declined,3_industrial,1,0,646,2013,"MULTIPOLYGON (((451584.920 4467181.410, 451586...",1,0,2,1268,"MULTIPOLYGON (((451599.360 4467174.940, 451584..."
2,000205600VK56E,declined,3_industrial,1,0,202,2013,"POLYGON ((451579.400 4467169.610, 451582.370 4...",1,0,1,297,"MULTIPOLYGON (((451593.000 4467163.250, 451579..."
3,000205700VK56E,declined,3_industrial,1,0,105,2013,"POLYGON ((451576.440 4467163.260, 451576.930 4...",1,0,1,155,"MULTIPOLYGON (((451590.040 4467156.910, 451576..."
4,000205800VK56E,declined,3_industrial,1,0,120,2013,"POLYGON ((451573.050 4467156.010, 451576.440 4...",1,0,1,174,"MULTIPOLYGON (((451586.650 4467149.650, 451573..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
122148,9999717VK4799H,functional,1_residential,1,1,170,2003,"POLYGON ((449884.579 4479732.588, 449881.318 4...",2,0,3,160,"MULTIPOLYGON (((449882.159 4479739.009, 449882..."
122149,9999718VK4799H,functional,1_residential,1,1,170,2003,"POLYGON ((449890.919 4479738.238, 449886.878 4...",2,0,3,160,"MULTIPOLYGON (((449888.508 4479744.679, 449889..."
122150,9999719VK4799H,functional,1_residential,1,1,170,2003,"POLYGON ((449897.269 4479743.888, 449895.829 4...",2,0,3,160,"MULTIPOLYGON (((449892.998 4479748.679, 449894..."
122151,9999720VK4799H,functional,1_residential,1,1,170,2003,"POLYGON ((449903.599 4479749.518, 449900.988 4...",2,0,3,161,"MULTIPOLYGON (((449901.248 4479756.028, 449902..."


## aux

In [89]:
assert 1 == 2

AssertionError: 

In [90]:
cad_pa_gdf = gpd.read_file(f"{CLEAN_PATH}/cadastralparcel.geojson")
cad_pa_gdf.name = NAMES_DICT['cad_pa_gdf']

# yearofConstruction doesn't indicate that in this case but when the parcel has been included in database.
# beginLifespanVersion: Fecha desde cuándo se ha dado de alta en la base de datos catastral. 

# cp:label: Es el número de parcela y corresponde al número que vemos representado en la cartografía.
# Para parcelas urbanas son 2 dígitos y para parcelas rústicas pueden ser hasta de 5 dígitos. 

# cp:referencePoint: Estructura GML de punto con las coordenadas del centroide de la parcela. Es un punto
# interior al recinto y es donde se justifica la posición de “cp:label” en los servicios de visualización


cad_pa_gdf.drop(['label', 'yearOfConstruction'], axis = 1, inplace = True)
cad_pa_gdf.head()

"""
Note_________________________________________________________________________________
Routine if I could have two geometries
______________________________________________________________________________________
cad_pa_gdf['pos_x'] = cad_pa_gdf.apply(lambda x: x['pos'].split(" ")[0], axis = 1)
cad_pa_gdf['pos_y'] = cad_pa_gdf.apply(lambda x: x['pos'].split(" ")[1], axis = 1)

cad_pa_gdf['centroid'] = gpd.points_from_xy(cad_pa_gdf['pos_x'], cad_pa_gdf['pos_y'])
cad_pa_gdf.drop(['pos', 'pos_x', 'pos_y'], axis = 1, inplace = True)
"""

#cad_pa_gdf.to_file(f"{CLEAN_PATH}/cadastralparcel.geojson", driver='GeoJSON')
display(cad_pa_gdf.head(2))
display(len(cad_pa_gdf['ID'].unique()) == len(cad_pa_gdf['ID']))

Unnamed: 0,ID,area_m2p,pos,geometry
0,000200500VK56E,1268,451607.03 4467199.27,"MULTIPOLYGON (((451599.360 4467174.940, 451584..."
1,000205600VK56E,297,451592.44 4467170.83,"MULTIPOLYGON (((451593.000 4467163.250, 451579..."


True