## RESULTS FROM CLUSTERING

In [1]:
# for data
from os import listdir
from os.path import isfile, join
from functools import reduce

import re
from itertools import combinations
from itertools import chain

import pandas as pd
import numpy as np

# for geospatial
import geopandas as gpd
import geojson
import rtree
import pyproj

# for viz
import folium.folium

import seaborn as sns
import matplotlib.pyplot as plt

import folium
from folium import plugins
import geoplot

%matplotlib inline

pd.set_option('display.max_rows', 1000)
sns.set_style("darkgrid", {"axes.facecolor": ".8"})

### GLOBAL VARIABLES

In [2]:
# GLOBAL VARIABLES

CLEAN_DATA_PATH = '../data/clean' # three files (there should be only two: catastro and arturo). DIFFERENT SHAPES
LABLD_DATA_PATH = '../data/labelled' # two files: clusters from catastro and clusters from all data. DIFFERENT SHAPES
MODEL_DATA_PATH = '../data/modelbase' # one file: data with all columns from arturo and catastro. SHAPE 90.5% of Catastro Data

FINAL_DATA_PATH = '../data/final_streamlit'

NAMES_DICT = {
    
    'cadastralparcel.geojson': 'CAD_PA',
    'otherconstruction.geojson': 'O_CONS',
    'buildingpart.geojson': 'P_BU',
    'building.geojson': 'BU',
    'cadastralzoning.geojson': 'CAD_ZO',
    '28900.geojson': 'MAD',
    
    'building_points.geojson': 'BU_POINTS',
    'arturo.geojson': 'ARTURO_DF',
    'building_polygs.geojson': 'BU_POLYGONS',
    'building_parcls.geojson': 'BU_PARCELS',
    
    'bu_parcel_epsg3857.geojson' : 'BU_PARCELS',
}

MADRID_EPSG = 25830
STATE = 42

In [3]:
def getting_final_geoframes(geometries_path):
    """
    INPUT:
    OUTPUT:
    """
    # List with necessary files    
    geom_bu_files = [f for f in listdir(geometries_path) if isfile(join(geometries_path, f)) and re.findall('bu', f)]   # only retrieves files names with 'building' in them
    print(f"\n-- Opening {len(geom_bu_files)} files in {geometries_path} --------------------------------------------------------------")
    
    for file in geom_bu_files:

        # CHANGE NAME OF PARCELS FILE IF CONVINIENT
        # Constructing a GeoDatFrame and giving them a name
        # Note: yield directly from gpd.read_files doesnt work like in pandas, returns constructor
        geom_file = gpd.read_file(f"{geometries_path}/{file}").set_index('ID')
        geom_file.name = NAMES_DICT[file]
        
        print(f"\t{file.split('.')[0]} \tOPENED \tMemory Usage:\t{np.round(geom_file.memory_usage().sum()/1000000, 2)} Mb \t\tShape: {geom_file.shape}")
        return geom_file


In [4]:
bu_parcel = getting_final_geoframes(FINAL_DATA_PATH)


-- Opening 1 files in ../data/final_streamlit --------------------------------------------------------------
	bu_parcel_epsg3857 	OPENED 	Memory Usage:	64.86 Mb 		Shape: (121004, 66)


In [5]:
bu_parcel.head(1)

Unnamed: 0_level_0,conditionOfConstruction,currentUse,n_BuildingUnits,n_Dwellings,nFloors_AG,nFloors_BG,area_m2c,area_m2p,train,land_use_mix,...,street_centrality_betweenness,street_centrality_closeness,street_hierarchy_primary,street_hierarchy_secondary,street_hierarchy_tertiary,value,centuryOfConstr,cluster_all,cluster_build,geometry
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0003405VK4700C,functional,residential,22,18,6,3,2408,521,0.0,0.000278,...,9.2e-05,0.015844,0.0,0.0,1.0,22.249808,20-02,33,7,"MULTIPOLYGON (((-412710.183 4921258.904, -4127..."


In [6]:
bu_parcel.crs

<Projected CRS: EPSG:3857>
Name: WGS 84 / Pseudo-Mercator
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: World - 85°S to 85°N
- bounds: (-180.0, -85.06, 180.0, 85.06)
Coordinate Operation:
- name: Popular Visualisation Pseudo-Mercator
- method: Popular Visualisation Pseudo Mercator
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

### FOLIUM ATTEMPT VRS.2 

In [148]:
bu_parcel[['cluster_all', 'value']].groupby(['cluster_all']).mean().T

cluster_all,0,1,2,3,4,5,6,7,8,9,...,78,79,80,81,82,83,84,85,86,87
value,19.936374,20.822714,21.286976,21.520223,21.983482,22.017052,22.033955,22.160387,22.178808,22.267185,...,25.886461,26.386198,26.396756,26.597938,26.60461,26.85602,27.646363,28.204624,28.63743,30.142092


In [137]:
### import folium

color_p = sns.color_palette("cool", bu_parcel['cluster_all'].max()+1).as_hex()[::-1]


m = folium.Map( # 'EPSG3857' by default
    location = [40.4168, -3.7038], 
    zoom_start = 14,
    control_scale = False,
    tiles = 'cartodbdark_matter',
 )

for val in range(0, bu_parcel['cluster_all'].max()):
    fillColor = color_p[val]
    folium.GeoJson(bu_parcel[bu_parcel['cluster_all'] == val][['geometry', 'cluster_all']],
                   name = f"cluster_{val}",
                   style_function = lambda x, fillColor=fillColor: {
                               'fillColor': fillColor,
                               'color' : '#000000',
                               'weight': 0.1,
                               'fillOpacity': 0.75}
              ).add_to(m)

folium.LayerControl().add_to(m)

m.save('tryout.html')
