# MERGING GEOJSON

In [None]:
from os import listdir
from os.path import isfile, join

import re
from itertools import combinations
from itertools import chain

import pandas as pd
import numpy as np

import geopandas as gpd
import geojson
import rtree

import folium.folium

import seaborn as sns
import matplotlib.pyplot as plt

import folium
from folium import plugins

%matplotlib inline

pd.set_option('display.max_rows', 1000)

In [None]:
CLEAN_PATH = '../data/clean' # transform this into temp file

NAMES_DICT = {
    
    'cad_pa_gdf': 'CAD_PA',
    'o_build_gdf': 'O_CONS',
    'p_build_gdf': 'P_BU',
    'build_gdf': 'BU',
    'cad_zo_gdf': 'CAD_ZO',
    'mad_gdf': 'MAD'
}

In [None]:
all_raw_geofiles = [f for f in listdir(CLEAN_PATH) if isfile(join(CLEAN_PATH, f))]
all_raw_geofiles

In [None]:
def merge_to_main_gdf(main_gdf, merged_gdf):
    """
    input:
    output: 
    """
    """
    merged_gdf.rename(columns = {
                            'geometry': f'geometry_{merged_gdf.name}',
                            'pos': f'pos_{merged_gdf.name}'
                            }, inplace = True, errors = False) """
    
    cols_to_keep_out = ['yearOfConstruction', 'label', 'geometry']     # this labels dont give any info
    cols_to_select = [col for col in merged_gdf.columns.tolist() if col not in cols_to_keep_out]
    
    if len(merged_gdf['ID'].unique()) == len(merged_gdf['ID'].array):
        return main_gdf.merge(merged_gdf[cols_to_select], on ='ID')

    else:
        aux_gdf = merged_gdf[cols_to_select].groupby(['ID']).max() # max as agg
        aux_gdf.reset_index(inplace = True)

        return main_gdf.merge(aux_gdf[cols_to_select], on = 'ID') # main_gdf.merge(aux_gdf, on ='ID')    

### LOADING BUILDING DATASET

In [None]:
build_gdf = gpd.read_file(f"{CLEAN_PATH}/building.geojson")
build_gdf.name = NAMES_DICT['build_gdf']

### LOADING BUILDING PART DATASET

In [None]:
# LOADING
p_build_gdf = gpd.read_file(f"{CLEAN_PATH}/buildingpart.geojson")
p_build_gdf.name = NAMES_DICT['p_build_gdf']

### LOADING CADASTRAL PARCEL DATASET

In [None]:
# LOADING
cad_pa_gdf = gpd.read_file(f"{CLEAN_PATH}/cadastralparcel.geojson")
cad_pa_gdf.name = NAMES_DICT['cad_pa_gdf']

### LOADING 28900 (MADRID) DATASET

In [None]:
# LOADING
mad_gdf = gpd.read_file(f"{CLEAN_PATH}/28900.geojson")
mad_gdf.name = NAMES_DICT['mad_gdf']

In [None]:
def changes_to_28900(gdf):
    return gdf.drop(['designator', 'yearOfConstruction'], axis = 1, inplace = True)

changes_to_28900(mad_gdf)

# Es un Dataset que muestra los portales de Madrid
# El ID es diferente al resto de DS
# Quizá habría que hacer un sjoin()

In [None]:
col_to_draw = 'specification' # col value corresponds grossfloorarea

mad_gdf[['geometry', col_to_draw]].plot(figsize = (25,35), column = col_to_draw, legend = True)
plt.show()

### LOADING CADASTRAL ZONING DATASET

### LOADING OTHER BUILDING DATASET

##  FINAL RESULT

In [None]:
# merging part_building to building-geojson
build_gdf = merge_to_main_gdf(main_gdf = build_gdf, 
                              merged_gdf = p_build_gdf)

In [None]:
# merging to building-geojson
final_gdf = merge_to_main_gdf(main_gdf = build_gdf, 
                              merged_gdf = cad_pa_gdf)

In [None]:
def split_xtr(x):
    if x:
        return x.split('_')[1]

def height_to_floors(gdf):
    for col in gdf.columns:
        if len(re.findall(r'height\w+', col)) != 0:
            sec_part_name = re.split(r'height', col)[1]

            gdf[col] == gdf.apply(lambda x: x[col] / 3, axis = 1)
            gdf.rename(columns = {col: f'nFloors{sec_part_name}'}, inplace = True)
        
        elif len(re.findall(r'numberOf\w+', col)) != 0:
            sec_part_name = re.split(r'numberOf', col)[1]
            gdf.rename(columns = {col: f'n_{sec_part_name}'}, inplace = True)
            
        else: pass
    
final_gdf['currentUse'] = final_gdf['currentUse'].apply(split_xtr)
height_to_floors(final_gdf)

In [None]:
cols_to_select = [col for col in final_gdf.columns.tolist() if col not in list(re.findall(r"geometry", col))]
cols_to_select += ['geometry']

final_gdf = gpd.GeoDataFrame( final_gdf[cols_to_select], 
                              geometry = final_gdf['geometry'] )

display(final_gdf.head(2))
display(type(final_gdf))
display(final_gdf.shape)

In [None]:
col_to_draw = 'yearOfConstruction' # col value corresponds grossfloorarea

final_gdf[['geometry', col_to_draw]].plot(figsize = (25,35), column = col_to_draw, legend = True)
plt.show()

In [None]:
final_gdf.columns
# order_cols = ['ID', 'conditionOfConstruction', 'currentUse', 'yearOfConstruction', '']


In [None]:
final_2_gdf = gpd.sjoin(mad_gdf, final_gdf, op='within')

In [None]:
display(final_2_gdf.head(2))
display(final_2_gdf.shape)

In [None]:
col_to_draw = 'yearOfConstruction' # col value corresponds grossfloorarea

final_2_gdf[['geometry', col_to_draw]].plot(figsize = (25,35), column = col_to_draw, legend = True)
plt.show()