# Mapa con choropleth_mapbox y Dash
# Bootstrap


In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline
import seaborn as sns 

from shapely.geometry import Point, Polygon 
import geopandas as gpd
from shapely.geometry import shape
import descartes
import json

In [2]:
import dash
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash_table as dt
import pandas as pd
import plotly.express as px
from sklearn.metrics import accuracy_score


In [3]:
# pd.set_option ("display.max_rows", None)
# pd.set_option ("display.max_columns", None)
# pd.set_option ("display.max_colwidth",200)

Esta función calcula la media de una variable
 - identificador indica si es district_id o vdcmun_id

In [4]:
def media_variable(df, variable, identificador):
    media=pd.DataFrame(df.groupby(identificador)[variable].mean())
    media=media.rename(columns={variable:variable+'_'+identificador})
    return(media)


Lo que hace esta función es calcular el porcentaje de una categoría dentro de una variable con varias categorías.
 - identificador indica si es district_id o vdcmun_id
 - categoría: categoría dentro de la variable de la que queremos calcular el porcentaje

In [5]:
def porcentage_variable(df, variable, identificador,categoria):
    df_tot=pd.DataFrame(df.groupby(identificador)[variable].count())
    df_tot=df_tot.rename(columns={variable: variable+'_'+identificador})
    df_categoria=pd.DataFrame(df.groupby(identificador)[variable].value_counts().unstack().fillna(0).loc[:,categoria])
    df_categoria=df_categoria.rename(columns={variable: "percentage_"+variable+'_'+categoria})  
    df_percentage=pd.DataFrame(df_categoria[categoria]/df_tot[variable+'_'+identificador]*100)                                     
    df_percentage=df_percentage.rename(columns={0: "percentage_"+variable+'_'+identificador})      
    return(df_percentage)            

Nuestro dashboard dispone de dos parte:
- Visualización de las variables: para ello cargo el dataset en que se ha generado con las variables limpias y agrupada, df_vis
- Prueba de concepto: cargaré las probabilidades sobre el conjunto de explotación que se han generado como resultado de escoger el mejor modelo, prob_explo

En ambos casos necesito cruzarlos y representarlos en el mapa de Nepal, map_nepal_json

## IMPORTACIÓN DE LOS DATOS

### MAPA

Para cargar el mapa y poderselo pasar al mapa choropleth lo abro en json.

In [6]:
with open('./Data/downloadeddata.geojson') as f:
    map_nepal_json = json.load(f)
# features = map_nepal_json['features'][0]
# features
#map_nepal_json

### DATOS

Empiezo a cargar los datos.

Disponemos de localización a nivel de municipio y distrito y las diferentes variables las tenemos a nivel de edificio, para la representación se ha realizado una media o porcentajes a nivel de distrito y municipio. Éstas variables resultantes las vamos a ir agregando a total_map, donde me quedaré con la columnas necesarias para poder cruzarlo con el map_nepal_json, formato requerido por choropleth

In [7]:
total_data=pd.read_csv("./Data/total_map.csv")

En este dataframe tengo datos que voy a necesitar para poder representar el mapa:

- N_ID es la columna pivote que voy a utilizar para relacionar mis datos con el json que he importado
- vdcmun_id lo voy a utilizar para hacer calculos de variables por municipio
- district_id lo voy a utilizar para hacer calculos de variables por distrito
- DISTRICT lo voy a utilizar para recoger el nombre de los distritos


Las variables que no utilizo las voy a eliminar.

In [8]:
total_data=total_data.drop(['Unnamed: 0', 'id','F_ID','NAME','districtId','provinceId','geometry','suma'],axis=1)

In [9]:
total_data=total_data.rename(columns={'vdcmun_id':'municipality',
                                      'district_id':'district',
    })

In [10]:
total_data.head()

Unnamed: 0,N_ID,LEVEL,DISTRICT,municipality,district
0,bhaktapur-bhaktapur,Nagarpalika,Bhaktapur,,
1,madhyapur -thimi-bhaktap,Nagarpalika,Bhaktapur,,
2,kathmandu-kathmandu,Mahanagarpalika,Kathmandu,,
3,kirtipur-kathmandu,Nagarpalika,Kathmandu,,
4,chitwan -national -park-,national-park,Makwanpur,,


In [11]:
df_vis = pd.read_csv("./Data/paso_2_agrupaciones_con_building_id.csv")

In [12]:
df_vis.head().T

Unnamed: 0,0,1,2,3,4
Unnamed: 0,0,1,2,3,4
building_id,120101000011,120101000021,120101000031,120101000041,120101000051
ward_id,120703,120703,120703,120703,120703
count_floors_pre_eq,1,1,1,1,1
age_building,2.30259,2.77259,3.04452,3.04452,3.43399
plinth_area_sq_ft,5.66643,5.8999,5.95324,5.7462,5.73334
height_ft_pre_eq,2.30259,2.30259,2.30259,2.30259,2.30259
land_surface_condition,Flat,Flat,Flat,Flat,Flat
foundation_type,others,others,others,others,others
roof_type,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof


Calculo el district_id y el vdcmun_id a partir del ward_id ya que lo necesito para los cruces

In [13]:
df_vis["district_id"]=df_vis["ward_id"].astype(str).str[:-4].astype(np.int64)
df_vis["vdcmun_id"]=df_vis["ward_id"].astype(str).str[:-2].astype(np.int64)
df_vis=df_vis.drop(['Unnamed: 0'],axis=1)

In [14]:
df_vis=df_vis.rename(columns={'vdcmun_id':'municipality',
                                      'district_id':'district'
    })

In [15]:
df_vis.head().T

Unnamed: 0,0,1,2,3,4
building_id,120101000011,120101000021,120101000031,120101000041,120101000051
ward_id,120703,120703,120703,120703,120703
count_floors_pre_eq,1,1,1,1,1
age_building,2.30259,2.77259,3.04452,3.04452,3.43399
plinth_area_sq_ft,5.66643,5.8999,5.95324,5.7462,5.73334
height_ft_pre_eq,2.30259,2.30259,2.30259,2.30259,2.30259
land_surface_condition,Flat,Flat,Flat,Flat,Flat
foundation_type,others,others,others,others,others
roof_type,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof
ground_floor_type,Mud,Mud,Mud,Mud,Mud


### CÁLCULO DE VARIABLES A REPRESENTAR DESCRIPTIVO

Vamos a calcular las siguientes variables a representar:
- Edad media del household por municipio y distrito
- Edad media del edificio por municipio y distrito
- Tamaño medio de la casa por  municipio y distrito
- Porcentaje de Población con educación por municipio y distrito
- Porcentaje de hogares cuya renta está por debajo de las 20.000 rupias por municipio y distrito
- Porcentaje de edificios cuyo suelo es barro por municipio y distrito

Todas estas variables las voy a ir uniendo al total_data para poder después representarlas todas en el mapa usando ese dataframe

#### EDAD MEDIA DEL HOUSEHOLD HEAD POR MUNICIPIO Y DISTRITO

In [16]:
age_household_media_vdcmun=media_variable(df_vis,'age_household_head','municipality')
total_data=pd.merge(total_data, age_household_media_vdcmun, how='inner',on='municipality')

In [17]:
age_household_media_district=media_variable(df_vis,'age_household_head','district')
total_data=pd.merge(total_data, age_household_media_district, how='inner',on='district')

In [18]:
total_data.head()

Unnamed: 0,N_ID,LEVEL,DISTRICT,municipality,district,age_household_head_municipality,age_household_head_district
0,chumnuwri-gorkha,Gaunpalika,Gorkha,3605.0,36.0,34.953284,44.346636
1,dharche-gorkha,Gaunpalika,Gorkha,3606.0,36.0,45.176689,44.346636
2,ajirkot-gorkha,Gaunpalika,Gorkha,3602.0,36.0,47.176659,44.346636
3,palungtar-gorkha,Nagarpalika,Gorkha,3609.0,36.0,44.488097,44.346636
4,siranchok-gorkha,Gaunpalika,Gorkha,3611.0,36.0,46.532816,44.346636


#### EDAD MEDIA DEL  BUILDING POR MUNICIPIO Y DISTRITO

In [19]:
age_building_media_vdcmun=media_variable(df_vis,'age_building','municipality')
total_data=pd.merge(total_data, age_building_media_vdcmun, how='inner',on='municipality')

In [20]:
age_building_media_district=media_variable(df_vis,'age_building','district')
total_data=pd.merge(total_data, age_building_media_district, how='inner',on='district')

In [21]:
total_data.head().T

Unnamed: 0,0,1,2,3,4
N_ID,chumnuwri-gorkha,dharche-gorkha,ajirkot-gorkha,palungtar-gorkha,siranchok-gorkha
LEVEL,Gaunpalika,Gaunpalika,Gaunpalika,Nagarpalika,Gaunpalika
DISTRICT,Gorkha,Gorkha,Gorkha,Gorkha,Gorkha
municipality,3605,3606,3602,3609,3611
district,36,36,36,36,36
age_household_head_municipality,34.9533,45.1767,47.1767,44.4881,46.5328
age_household_head_district,44.3466,44.3466,44.3466,44.3466,44.3466
age_building_municipality,3.17252,2.51168,2.84823,2.74701,2.97407
age_building_district,2.84528,2.84528,2.84528,2.84528,2.84528


#### TAMAÑO MEDIO DE LA CASA POR MUNICIPIO Y POR DISTRITO

In [22]:
house_media_vdcmun=media_variable(df_vis,'plinth_area_sq_ft','municipality')
total_data=pd.merge(total_data, house_media_vdcmun, how='inner',on='municipality')
house_media_district=media_variable(df_vis,'plinth_area_sq_ft','district')
total_data=pd.merge(total_data, house_media_district, how='inner',on='district')

In [23]:
total_data.head()

Unnamed: 0,N_ID,LEVEL,DISTRICT,municipality,district,age_household_head_municipality,age_household_head_district,age_building_municipality,age_building_district,plinth_area_sq_ft_municipality,plinth_area_sq_ft_district
0,chumnuwri-gorkha,Gaunpalika,Gorkha,3605.0,36.0,34.953284,44.346636,3.172519,2.84528,5.803752,5.921137
1,dharche-gorkha,Gaunpalika,Gorkha,3606.0,36.0,45.176689,44.346636,2.511679,2.84528,6.089501,5.921137
2,ajirkot-gorkha,Gaunpalika,Gorkha,3602.0,36.0,47.176659,44.346636,2.848226,2.84528,6.026181,5.921137
3,palungtar-gorkha,Nagarpalika,Gorkha,3609.0,36.0,44.488097,44.346636,2.747007,2.84528,5.828494,5.921137
4,siranchok-gorkha,Gaunpalika,Gorkha,3611.0,36.0,46.532816,44.346636,2.974068,2.84528,5.780081,5.921137


#### PORCENTAJE DE EDIFICIOS CON  TIPO DE SUELO BARRO DAÑADOS POR MUNICIPIO Y DISTRITO

In [24]:
df_vis['ground_floor_type'].unique()

array(['Mud', 'others', 'RC'], dtype=object)

In [25]:
df_percentage_ground_mud_vdcmun=porcentage_variable(df_vis,'ground_floor_type','municipality','Mud')
total_data=pd.merge(total_data, df_percentage_ground_mud_vdcmun, how='inner',on='municipality')
df_percentage_ground_mud_district=porcentage_variable(df_vis,'ground_floor_type','district','Mud')
total_data=pd.merge(total_data, df_percentage_ground_mud_district, how='inner',on='district')

In [26]:
total_data.head()

Unnamed: 0,N_ID,LEVEL,DISTRICT,municipality,district,age_household_head_municipality,age_household_head_district,age_building_municipality,age_building_district,plinth_area_sq_ft_municipality,plinth_area_sq_ft_district,percentage_ground_floor_type_municipality,percentage_ground_floor_type_district
0,chumnuwri-gorkha,Gaunpalika,Gorkha,3605.0,36.0,34.953284,44.346636,3.172519,2.84528,5.803752,5.921137,50.228311,80.778671
1,dharche-gorkha,Gaunpalika,Gorkha,3606.0,36.0,45.176689,44.346636,2.511679,2.84528,6.089501,5.921137,84.434546,80.778671
2,ajirkot-gorkha,Gaunpalika,Gorkha,3602.0,36.0,47.176659,44.346636,2.848226,2.84528,6.026181,5.921137,89.336384,80.778671
3,palungtar-gorkha,Nagarpalika,Gorkha,3609.0,36.0,44.488097,44.346636,2.747007,2.84528,5.828494,5.921137,84.647773,80.778671
4,siranchok-gorkha,Gaunpalika,Gorkha,3611.0,36.0,46.532816,44.346636,2.974068,2.84528,5.780081,5.921137,96.47106,80.778671


#### PORCENTAJE DE PROPIETARIOS SIN EDUCACIÓN POR MUNICIPIO Y DISTRITO

In [27]:
df_vis['education_level_household_head'].unique()

array(['Illiterate', 'Low', 'Medium', 'no_familias', 'High'], dtype=object)

In [28]:
df_percentage_illiterate_vdcmun=porcentage_variable(df_vis,'education_level_household_head','municipality','Illiterate')
total_data=pd.merge(total_data, df_percentage_illiterate_vdcmun, how='inner',on='municipality')
df_percentage_illiterate_district=porcentage_variable(df_vis,'education_level_household_head','district','Illiterate')
total_data=pd.merge(total_data, df_percentage_illiterate_district, how='inner',on='district')

In [29]:
total_data.head()

Unnamed: 0,N_ID,LEVEL,DISTRICT,municipality,district,age_household_head_municipality,age_household_head_district,age_building_municipality,age_building_district,plinth_area_sq_ft_municipality,plinth_area_sq_ft_district,percentage_ground_floor_type_municipality,percentage_ground_floor_type_district,percentage_education_level_household_head_municipality,percentage_education_level_household_head_district
0,chumnuwri-gorkha,Gaunpalika,Gorkha,3605.0,36.0,34.953284,44.346636,3.172519,2.84528,5.803752,5.921137,50.228311,80.778671,48.05058,30.961548
1,dharche-gorkha,Gaunpalika,Gorkha,3606.0,36.0,45.176689,44.346636,2.511679,2.84528,6.089501,5.921137,84.434546,80.778671,62.410294,30.961548
2,ajirkot-gorkha,Gaunpalika,Gorkha,3602.0,36.0,47.176659,44.346636,2.848226,2.84528,6.026181,5.921137,89.336384,80.778671,32.906178,30.961548
3,palungtar-gorkha,Nagarpalika,Gorkha,3609.0,36.0,44.488097,44.346636,2.747007,2.84528,5.828494,5.921137,84.647773,80.778671,25.360324,30.961548
4,siranchok-gorkha,Gaunpalika,Gorkha,3611.0,36.0,46.532816,44.346636,2.974068,2.84528,5.780081,5.921137,96.47106,80.778671,28.579067,30.961548


Hago una pequeña comprobación para ver si el número de distritos es el correcto, 11

In [30]:
total_data.DISTRICT.unique().shape

(11,)

#### PORCENTAJE DE HOGARES CUYA RENTA ESTÁ POPR DEBAJO DE 2000 RUPIAS POR MUNICIPIO Y DISTRITO

In [31]:
df_vis['income_level_household'].unique()

array(['Low', 'Medium', 'no_familias', 'High'], dtype=object)

In [32]:
df_percentage_income_vdcmun=porcentage_variable(df_vis,'income_level_household','municipality','Low')
total_data=pd.merge(total_data, df_percentage_income_vdcmun, how='inner',on='municipality')
df_percentage_ncome_district=porcentage_variable(df_vis,'income_level_household','district','Low')
total_data=pd.merge(total_data, df_percentage_ncome_district, how='inner',on='district')

In [33]:
total_data.head().T

Unnamed: 0,0,1,2,3,4
N_ID,chumnuwri-gorkha,dharche-gorkha,ajirkot-gorkha,palungtar-gorkha,siranchok-gorkha
LEVEL,Gaunpalika,Gaunpalika,Gaunpalika,Nagarpalika,Gaunpalika
DISTRICT,Gorkha,Gorkha,Gorkha,Gorkha,Gorkha
municipality,3605,3606,3602,3609,3611
district,36,36,36,36,36
age_household_head_municipality,34.9533,45.1767,47.1767,44.4881,46.5328
age_household_head_district,44.3466,44.3466,44.3466,44.3466,44.3466
age_building_municipality,3.17252,2.51168,2.84823,2.74701,2.97407
age_building_district,2.84528,2.84528,2.84528,2.84528,2.84528
plinth_area_sq_ft_municipality,5.80375,6.0895,6.02618,5.82849,5.78008


In [34]:
total_data.DISTRICT.unique()

array(['Gorkha', 'Dhading', 'Rasuwa', 'Nuwakot', 'Sindhupalchowk',
       'Makwanpur', 'Kavrepalanchok', 'Dolakha', 'Ramechhap', 'Sindhuli',
       'Okhaldhunga'], dtype=object)

In [35]:
# total_data.to_csv(r'./Data/total_map_con_media.csv')

In [36]:
df_vis.head().T

Unnamed: 0,0,1,2,3,4
building_id,120101000011,120101000021,120101000031,120101000041,120101000051
ward_id,120703,120703,120703,120703,120703
count_floors_pre_eq,1,1,1,1,1
age_building,2.30259,2.77259,3.04452,3.04452,3.43399
plinth_area_sq_ft,5.66643,5.8999,5.95324,5.7462,5.73334
height_ft_pre_eq,2.30259,2.30259,2.30259,2.30259,2.30259
land_surface_condition,Flat,Flat,Flat,Flat,Flat
foundation_type,others,others,others,others,others
roof_type,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof,Bamboo/Timber-Light roof
ground_floor_type,Mud,Mud,Mud,Mud,Mud


#### VARIABLES POPULATION

In [37]:
population=pd.read_csv("./Data/Selected Locations.csv")

In [38]:
population.head()

Unnamed: 0,Location,Population,Individuals living outside the country,Individuals with some form of formal education,Households,Households that use wood as a cooking fuel,"Households that earn less than Rs 10,000 per month",Households with no toilets,Buildings,Totally collapsed buildings,Buildings needing major repair or reconstruction
0,Okhaldhunga,190248,9948,119431,36114,35296,24812,4865,39352,5620,23646
1,Sindhuli,352965,15748,211749,64913,59745,43425,7866,68750,8191,41047
2,Ramechhap,284051,13539,178158,55262,52982,36086,11121,58623,11490,49627
3,Dolakha,279577,16878,177310,70496,64280,35108,1012,60639,35364,56117
4,Sindhupalchok,425175,26715,256561,90083,84119,48522,2080,88741,71432,85497


In [39]:
population=population.drop(['Individuals living outside the country',
                            'Individuals with some form of formal education',
                           'Households that use wood as a cooking fuel',
                           'Households that earn less than Rs 10,000 per month',
                           'Households with no toilets',
                           'Totally collapsed buildings',
                           'Buildings needing major repair or reconstruction'], axis=1)
population=population.rename(columns={"Location": "DISTRICT"})

In [40]:
population['DISTRICT'].unique()

array(['Okhaldhunga', 'Sindhuli', 'Ramechhap', 'Dolakha', 'Sindhupalchok',
       'Kavrepalanchok', 'Nuwakot', 'Rasuwa', 'Dhading', 'Makwanpur',
       'Gorkha'], dtype=object)

In [41]:
population=population.replace('Sindhupalchok','Sindhupalchowk')

In [42]:
population['DISTRICT'].unique()

array(['Okhaldhunga', 'Sindhuli', 'Ramechhap', 'Dolakha',
       'Sindhupalchowk', 'Kavrepalanchok', 'Nuwakot', 'Rasuwa', 'Dhading',
       'Makwanpur', 'Gorkha'], dtype=object)

In [43]:
total_data=pd.merge(total_data, population, how='inner',on='DISTRICT')

In [44]:
total_data.DISTRICT.unique().shape

(11,)

### PRUEBA DE CONCEPTO

En los pasos anteriores hemos reservado el 20% de los datos para explotación, usando el 80% para entrenar los distintos modelos. De esta forma vamos a hacer las predicciones comparandolas con la realidad para poder hacer una prueba de concepto.

## Cálculo de probabilidades

In [45]:
prob_explo=pd.read_csv("./Data/prob_explo_con2clases_building_id_RF_COLINEADAS.csv")
prob_explo=prob_explo.drop('Unnamed: 0',axis=1)

In [46]:
prob_explo.sort_values(by = 'damage_grade_prob', ascending=False).head(500)

Unnamed: 0,ward_id,building_id,damage_grade_prob,vdcmun_id,district_id,real_class
39360,360602,363401000631,1.000000,3606,36,1
13343,360602,363402001011,1.000000,3606,36,1
64456,360602,363402000221,0.999797,3606,36,1
57051,360602,363402000461,0.999797,3606,36,1
10614,220607,224709001451,0.999794,2206,22,1
...,...,...,...,...,...,...
49125,220605,223501001161,0.998186,2206,22,1
143196,220605,223501001191,0.998186,2206,22,1
99680,230105,231805001921,0.998185,2301,23,1
88530,220606,223505002231,0.998184,2206,22,1


In [47]:
prob_explo=prob_explo.rename(columns={'vdcmun_id':'municipality',
                                      'district_id':'district',
                                      'damage_grade_prob':'probabilidad colapso edificio'
    })

In [48]:
prob_explo[(prob_explo['probabilidad colapso edificio']>0.3) & (prob_explo['probabilidad colapso edificio']<0.4)]

Unnamed: 0,ward_id,building_id,probabilidad colapso edificio,municipality,district,real_class
2,310502,312705001011,0.390709,3105,31,1
3,360814,361809000811,0.306060,3608,36,0
5,200701,203303000361,0.325187,2007,20,0
11,310803,311803003531,0.363813,3108,31,1
34,210401,211602000501,0.367889,2104,21,0
...,...,...,...,...,...,...
163676,241303,248005000091,0.363579,2413,24,0
163695,360708,362708000631,0.396999,3607,36,0
163696,361006,362507000741,0.393847,3610,36,0
163710,200901,200709000111,0.329845,2009,20,0


In [49]:
prob_explo.head()

Unnamed: 0,ward_id,building_id,probabilidad colapso edificio,municipality,district,real_class
0,300408,301403000471,0.70217,3004,30,1
1,300302,301302010401,0.047216,3003,30,0
2,310502,312705001011,0.390709,3105,31,1
3,360814,361809000811,0.30606,3608,36,0
4,360108,360403000451,0.888182,3601,36,1


In [50]:
prob_explo['2_80'] = np.where(prob_explo['probabilidad colapso edificio']>0.8,prob_explo['probabilidad colapso edificio'] , 0)
prob_explo['2_80_70']=np.where((prob_explo['probabilidad colapso edificio']>0.7) & (prob_explo['probabilidad colapso edificio']<0.8),prob_explo['probabilidad colapso edificio'],0)
prob_explo['2_60_50']=np.where((prob_explo['probabilidad colapso edificio']>0.5) & (prob_explo['probabilidad colapso edificio']<0.6),prob_explo['probabilidad colapso edificio'],0)
prob_explo['2_50_40']=np.where((prob_explo['probabilidad colapso edificio']>0.4) & (prob_explo['probabilidad colapso edificio']<0.5),prob_explo['probabilidad colapso edificio'],0)
prob_explo['2_40_30']=np.where((prob_explo['probabilidad colapso edificio']>0.3) & (prob_explo['probabilidad colapso edificio']<0.4),prob_explo['probabilidad colapso edificio'],0)
prob_explo['2_30']=np.where((prob_explo['probabilidad colapso edificio']>0.0) & (prob_explo['probabilidad colapso edificio']<0.3),prob_explo['probabilidad colapso edificio'],0)

In [51]:
prob_explo.head()

Unnamed: 0,ward_id,building_id,probabilidad colapso edificio,municipality,district,real_class,2_80,2_80_70,2_60_50,2_50_40,2_40_30,2_30
0,300408,301403000471,0.70217,3004,30,1,0.0,0.70217,0.0,0.0,0.0,0.0
1,300302,301302010401,0.047216,3003,30,0,0.0,0.0,0.0,0.0,0.0,0.047216
2,310502,312705001011,0.390709,3105,31,1,0.0,0.0,0.0,0.0,0.390709,0.0
3,360814,361809000811,0.30606,3608,36,0,0.0,0.0,0.0,0.0,0.30606,0.0
4,360108,360403000451,0.888182,3601,36,1,0.888182,0.0,0.0,0.0,0.0,0.0


In [52]:
prob_explo.shape

(163740, 12)

In [53]:
prob_explo[prob_explo['probabilidad colapso edificio']>0.5].count()/prob_explo.shape[0]

ward_id                          0.627519
building_id                      0.627519
probabilidad colapso edificio    0.627519
municipality                     0.627519
district                         0.627519
real_class                       0.627519
2_80                             0.627519
2_80_70                          0.627519
2_60_50                          0.627519
2_50_40                          0.627519
2_40_30                          0.627519
2_30                             0.627519
dtype: float64

In [54]:
prob_explo[prob_explo['probabilidad colapso edificio']<0.5].count()/prob_explo.shape[0]

ward_id                          0.372475
building_id                      0.372475
probabilidad colapso edificio    0.372475
municipality                     0.372475
district                         0.372475
real_class                       0.372475
2_80                             0.372475
2_80_70                          0.372475
2_60_50                          0.372475
2_50_40                          0.372475
2_40_30                          0.372475
2_30                             0.372475
dtype: float64

In [55]:
prob_explo[prob_explo['probabilidad colapso edificio']>0.8].count()/prob_explo.shape[0]

ward_id                          0.398705
building_id                      0.398705
probabilidad colapso edificio    0.398705
municipality                     0.398705
district                         0.398705
real_class                       0.398705
2_80                             0.398705
2_80_70                          0.398705
2_60_50                          0.398705
2_50_40                          0.398705
2_40_30                          0.398705
2_30                             0.398705
dtype: float64

In [56]:
prob_explo[(prob_explo['probabilidad colapso edificio']>0.7) & (prob_explo['probabilidad colapso edificio']<0.8)].count()/prob_explo.shape[0]

ward_id                          0.084744
building_id                      0.084744
probabilidad colapso edificio    0.084744
municipality                     0.084744
district                         0.084744
real_class                       0.084744
2_80                             0.084744
2_80_70                          0.084744
2_60_50                          0.084744
2_50_40                          0.084744
2_40_30                          0.084744
2_30                             0.084744
dtype: float64

In [57]:
prob_explo[(prob_explo['probabilidad colapso edificio']>0.6) & (prob_explo['probabilidad colapso edificio']<0.7)].count()/prob_explo.shape[0]

ward_id                          0.072163
building_id                      0.072163
probabilidad colapso edificio    0.072163
municipality                     0.072163
district                         0.072163
real_class                       0.072163
2_80                             0.072163
2_80_70                          0.072163
2_60_50                          0.072163
2_50_40                          0.072163
2_40_30                          0.072163
2_30                             0.072163
dtype: float64

In [58]:
prob_explo[(prob_explo['probabilidad colapso edificio']>0.5) & (prob_explo['probabilidad colapso edificio']<0.6)].count()/prob_explo.shape[0]

ward_id                          0.071907
building_id                      0.071907
probabilidad colapso edificio    0.071907
municipality                     0.071907
district                         0.071907
real_class                       0.071907
2_80                             0.071907
2_80_70                          0.071907
2_60_50                          0.071907
2_50_40                          0.071907
2_40_30                          0.071907
2_30                             0.071907
dtype: float64

In [59]:
prob_explo[(prob_explo['probabilidad colapso edificio']>0.4) & (prob_explo['probabilidad colapso edificio']<0.5)].count()/prob_explo.shape[0]

ward_id                          0.068065
building_id                      0.068065
probabilidad colapso edificio    0.068065
municipality                     0.068065
district                         0.068065
real_class                       0.068065
2_80                             0.068065
2_80_70                          0.068065
2_60_50                          0.068065
2_50_40                          0.068065
2_40_30                          0.068065
2_30                             0.068065
dtype: float64

In [60]:
prob_explo[(prob_explo['probabilidad colapso edificio']>0.3) & (prob_explo['probabilidad colapso edificio']<0.4)].count()/prob_explo.shape[0]

ward_id                          0.070374
building_id                      0.070374
probabilidad colapso edificio    0.070374
municipality                     0.070374
district                         0.070374
real_class                       0.070374
2_80                             0.070374
2_80_70                          0.070374
2_60_50                          0.070374
2_50_40                          0.070374
2_40_30                          0.070374
2_30                             0.070374
dtype: float64

In [61]:
prob_explo[(prob_explo['probabilidad colapso edificio']>0.2) & (prob_explo['probabilidad colapso edificio']<0.3)].count()/prob_explo.shape[0]

ward_id                          0.059362
building_id                      0.059362
probabilidad colapso edificio    0.059362
municipality                     0.059362
district                         0.059362
real_class                       0.059362
2_80                             0.059362
2_80_70                          0.059362
2_60_50                          0.059362
2_50_40                          0.059362
2_40_30                          0.059362
2_30                             0.059362
dtype: float64

In [62]:
prob_explo[(prob_explo['probabilidad colapso edificio']>0.0) & (prob_explo['probabilidad colapso edificio']<0.2)].count()/prob_explo.shape[0]

ward_id                          0.174075
building_id                      0.174075
probabilidad colapso edificio    0.174075
municipality                     0.174075
district                         0.174075
real_class                       0.174075
2_80                             0.174075
2_80_70                          0.174075
2_60_50                          0.174075
2_50_40                          0.174075
2_40_30                          0.174075
2_30                             0.174075
dtype: float64

In [63]:
prob_explo.head()

Unnamed: 0,ward_id,building_id,probabilidad colapso edificio,municipality,district,real_class,2_80,2_80_70,2_60_50,2_50_40,2_40_30,2_30
0,300408,301403000471,0.70217,3004,30,1,0.0,0.70217,0.0,0.0,0.0,0.0
1,300302,301302010401,0.047216,3003,30,0,0.0,0.0,0.0,0.0,0.0,0.047216
2,310502,312705001011,0.390709,3105,31,1,0.0,0.0,0.0,0.0,0.390709,0.0
3,360814,361809000811,0.30606,3608,36,0,0.0,0.0,0.0,0.0,0.30606,0.0
4,360108,360403000451,0.888182,3601,36,1,0.888182,0.0,0.0,0.0,0.0,0.0


In [64]:
# prob_explo['% edificios con daño alto'] = np.where(prob_explo['probabilidad colapso edificio']>0.7,prob_explo['probabilidad colapso edificio'] , 0)
# prob_explo['% edificios con daño medio']=np.where((prob_explo['probabilidad colapso edificio']>0.2) & (prob_explo['probabilidad colapso edificio']<0.7),prob_explo['probabilidad colapso edificio'],0)
# prob_explo['% edificios con daño bajo']=np.where((prob_explo['probabilidad colapso edificio']>0.0) & (prob_explo['probabilidad colapso edificio']<0.2),prob_explo['probabilidad colapso edificio'],0)

In [65]:
# prob_explo.head().T

In [66]:
df_tot_district=pd.DataFrame(prob_explo.groupby("district")['building_id'].count())
df_tot_district=df_tot_district.rename(columns={"building_id": "total_buildings"})
# df_tot_district

In [67]:
df_prob_alta_district=prob_explo.loc[prob_explo['probabilidad colapso edificio']>0.7,['district','probabilidad colapso edificio']].groupby('district').count()
# df_prob_alta_district

In [68]:
porcentaje_probabilidad_alta_district=pd.DataFrame(df_prob_alta_district['probabilidad colapso edificio']/df_tot_district['total_buildings']*100)
porcentaje_probabilidad_alta_district=porcentaje_probabilidad_alta_district.rename(columns={0: '% edificios con daño alto_district'})
# porcentaje_probabilidad_alta_district
# prob_explo['% edificios con daño alto']

In [69]:
df_prob_media_district=prob_explo.loc[((prob_explo['probabilidad colapso edificio']<=0.7) & (prob_explo['probabilidad colapso edificio']>0.2)),
               ['district','probabilidad colapso edificio']].groupby('district').count()

In [70]:
porcentaje_probabilidad_media_district=pd.DataFrame(df_prob_media_district['probabilidad colapso edificio']/df_tot_district['total_buildings']*100)
porcentaje_probabilidad_media_district=porcentaje_probabilidad_media_district.rename(columns={0: '% edificios con daño medio_district'})
# porcentaje_probabilidad_media_district

In [71]:
df_prob_baja_district=prob_explo.loc[((prob_explo['probabilidad colapso edificio']<=0.2) ),
               ['district','probabilidad colapso edificio']].groupby('district').count()

In [72]:
porcentaje_probabilidad_baja_district=pd.DataFrame(df_prob_baja_district['probabilidad colapso edificio']/df_tot_district['total_buildings']*100)
porcentaje_probabilidad_baja_district=porcentaje_probabilidad_baja_district.rename(columns={0: '% edificios con daño bajo_district'})
# porcentaje_probabilidad_baja_district

In [73]:
PoC_probs=pd.merge(prob_explo, porcentaje_probabilidad_alta_district,on='district')
PoC_probs=pd.merge(PoC_probs, porcentaje_probabilidad_media_district,on='district')
PoC_probs=pd.merge(PoC_probs, porcentaje_probabilidad_baja_district,on='district')
PoC_probs.head()

Unnamed: 0,ward_id,building_id,probabilidad colapso edificio,municipality,district,real_class,2_80,2_80_70,2_60_50,2_50_40,2_40_30,2_30,% edificios con daño alto_district,% edificios con daño medio_district,% edificios con daño bajo_district
0,300408,301403000471,0.70217,3004,30,1,0.0,0.70217,0.0,0.0,0.0,0.0,58.532806,32.37337,9.093823
1,300302,301302010401,0.047216,3003,30,0,0.0,0.0,0.0,0.0,0.0,0.047216,58.532806,32.37337,9.093823
2,300602,302906001351,0.451091,3006,30,1,0.0,0.0,0.0,0.451091,0.0,0.0,58.532806,32.37337,9.093823
3,300507,300605001481,0.014825,3005,30,0,0.0,0.0,0.0,0.0,0.0,0.014825,58.532806,32.37337,9.093823
4,300506,300304001331,0.957502,3005,30,1,0.957502,0.0,0.0,0.0,0.0,0.0,58.532806,32.37337,9.093823


In [74]:
df_tot_vdcmun=pd.DataFrame(prob_explo.groupby("municipality")['building_id'].count())
df_tot_vdcmun=df_tot_vdcmun.rename(columns={"building_id": "total_buildings"})

In [75]:
df_prob_alta_vdcmun=prob_explo.loc[prob_explo['probabilidad colapso edificio']>0.7,['municipality','probabilidad colapso edificio']].groupby('municipality').count()

df_prob_media_vdcmun=prob_explo.loc[((prob_explo['probabilidad colapso edificio']<=0.7) & (prob_explo['probabilidad colapso edificio']>0.2)),
               ['municipality','probabilidad colapso edificio']].groupby('municipality').count()

df_prob_baja_vdcmun=prob_explo.loc[((prob_explo['probabilidad colapso edificio']<=0.2) ),
               ['municipality','probabilidad colapso edificio']].groupby('municipality').count()

In [76]:
porcentaje_probabilidad_alta_vdcmun=pd.DataFrame(df_prob_alta_vdcmun['probabilidad colapso edificio']/df_tot_vdcmun['total_buildings']*100)
porcentaje_probabilidad_alta_vdcmun=porcentaje_probabilidad_alta_vdcmun.rename(columns={0: '% edificios con daño alto_municipality'})

porcentaje_probabilidad_media_vdcmun=pd.DataFrame(df_prob_media_vdcmun['probabilidad colapso edificio']/df_tot_vdcmun['total_buildings']*100)
porcentaje_probabilidad_media_vdcmun=porcentaje_probabilidad_media_vdcmun.rename(columns={0: '% edificios con daño medio_municipality'})

porcentaje_probabilidad_baja_vdcmun=pd.DataFrame(df_prob_baja_vdcmun['probabilidad colapso edificio']/df_tot_vdcmun['total_buildings']*100)
porcentaje_probabilidad_baja_vdcmun=porcentaje_probabilidad_baja_vdcmun.rename(columns={0: '% edificios con daño bajo_municipality'})


In [77]:
PoC_probs=pd.merge(PoC_probs, porcentaje_probabilidad_alta_vdcmun,on='municipality')
PoC_probs=pd.merge(PoC_probs,  porcentaje_probabilidad_media_vdcmun,on='municipality')
PoC_probs=pd.merge(PoC_probs, porcentaje_probabilidad_baja_vdcmun,on='municipality')
PoC_probs.head().T

Unnamed: 0,0,1,2,3,4
ward_id,300408.0,300404.0,300404.0,300406.0,300406.0
building_id,301403000000.0,300203000000.0,300203000000.0,300209000000.0,300209000000.0
probabilidad colapso edificio,0.70217,0.5774653,0.5320566,0.006653532,0.5932945
municipality,3004.0,3004.0,3004.0,3004.0,3004.0
district,30.0,30.0,30.0,30.0,30.0
real_class,1.0,0.0,0.0,0.0,1.0
2_80,0.0,0.0,0.0,0.0,0.0
2_80_70,0.70217,0.0,0.0,0.0,0.0
2_60_50,0.0,0.5774653,0.5320566,0.0,0.5932945
2_50_40,0.0,0.0,0.0,0.0,0.0


In [78]:
PoC_probs.head().T

Unnamed: 0,0,1,2,3,4
ward_id,300408.0,300404.0,300404.0,300406.0,300406.0
building_id,301403000000.0,300203000000.0,300203000000.0,300209000000.0,300209000000.0
probabilidad colapso edificio,0.70217,0.5774653,0.5320566,0.006653532,0.5932945
municipality,3004.0,3004.0,3004.0,3004.0,3004.0
district,30.0,30.0,30.0,30.0,30.0
real_class,1.0,0.0,0.0,0.0,1.0
2_80,0.0,0.0,0.0,0.0,0.0
2_80_70,0.70217,0.0,0.0,0.0,0.0
2_60_50,0.0,0.5774653,0.5320566,0.0,0.5932945
2_50_40,0.0,0.0,0.0,0.0,0.0


In [79]:
# PoC_probs.groupby('district_id')['vdcmun_id'].counts()

In [80]:
PoC_probs=pd.merge(PoC_probs, total_data,how='inner')


In [81]:
PoC_probs.head().T

Unnamed: 0,0,1,2,3,4
ward_id,300408,300404,300404,300406,300406
building_id,301403000471,300203000082,300203000651,300209024882,300209021061
probabilidad colapso edificio,0.70217,0.577465,0.532057,0.00665353,0.593294
municipality,3004,3004,3004,3004,3004
district,30,30,30,30,30
real_class,1,0,0,0,1
2_80,0,0,0,0,0
2_80_70,0.70217,0,0,0,0
2_60_50,0,0.577465,0.532057,0,0.593294
2_50_40,0,0,0,0,0


In [82]:
# PoC_probs.groupby('vdcmun_id')['vdcmun_id'].counts()

In [83]:
df_tabla_PoC=PoC_probs.copy()

In [84]:
df_tabla_PoC.head()

Unnamed: 0,ward_id,building_id,probabilidad colapso edificio,municipality,district,real_class,2_80,2_80_70,2_60_50,2_50_40,...,plinth_area_sq_ft_district,percentage_ground_floor_type_municipality,percentage_ground_floor_type_district,percentage_education_level_household_head_municipality,percentage_education_level_household_head_district,percentage_income_level_household_municipality,percentage_income_level_household_district,Population,Households,Buildings
0,300408,301403000471,0.70217,3004,30,1,0.0,0.70217,0.0,0.0,...,5.772861,88.604685,76.585893,35.723523,34.775216,83.18724,78.135426,430851,86381,89122
1,300404,300203000082,0.577465,3004,30,0,0.0,0.0,0.577465,0.0,...,5.772861,88.604685,76.585893,35.723523,34.775216,83.18724,78.135426,430851,86381,89122
2,300404,300203000651,0.532057,3004,30,0,0.0,0.0,0.532057,0.0,...,5.772861,88.604685,76.585893,35.723523,34.775216,83.18724,78.135426,430851,86381,89122
3,300406,300209024882,0.006654,3004,30,0,0.0,0.0,0.0,0.0,...,5.772861,88.604685,76.585893,35.723523,34.775216,83.18724,78.135426,430851,86381,89122
4,300406,300209021061,0.593294,3004,30,1,0.0,0.0,0.593294,0.0,...,5.772861,88.604685,76.585893,35.723523,34.775216,83.18724,78.135426,430851,86381,89122


In [85]:
 list(df_tabla_PoC.columns)

['ward_id',
 'building_id',
 'probabilidad colapso edificio',
 'municipality',
 'district',
 'real_class',
 '2_80',
 '2_80_70',
 '2_60_50',
 '2_50_40',
 '2_40_30',
 '2_30',
 '% edificios con daño alto_district',
 '% edificios con daño medio_district',
 '% edificios con daño bajo_district',
 '% edificios con daño alto_municipality',
 '% edificios con daño medio_municipality',
 '% edificios con daño bajo_municipality',
 'N_ID',
 'LEVEL',
 'DISTRICT',
 'age_household_head_municipality',
 'age_household_head_district',
 'age_building_municipality',
 'age_building_district',
 'plinth_area_sq_ft_municipality',
 'plinth_area_sq_ft_district',
 'percentage_ground_floor_type_municipality',
 'percentage_ground_floor_type_district',
 'percentage_education_level_household_head_municipality',
 'percentage_education_level_household_head_district',
 'percentage_income_level_household_municipality',
 'percentage_income_level_household_district',
 'Population',
 'Households',
 'Buildings']

In [86]:
# df_tabla_PoC=df_tabla_PoC.drop(['2_80',
#  '2_80_70',
#  '2_70_60',
#  '2_60_50',
#  '2_50_40',
#  '2_40_30',
#  '2_30',
#  '2_80_vdcmun_id',
#  '2_80_70_vdcmun_id',
#  '2_70_60_vdcmun_id',
#  '2_60_50_vdcmun_id',
#  '2_50_40_vdcmun_id',
#  '2_40_30_vdcmun_id',
#  '2_30_vdcmun_id',
#  '2_80_district_id',
#  'N_ID',
#  'LEVEL',
#  'age_household_head_vdcmun_id',
#  'age_household_head_district_id',
#  'size_household_vdcmun_id',
#  'size_household_district_id',
#  'percentage_damage_grade_vdcmun_id',
#  'percentage_damage_grade_district_id',
#  'percentage_education_level_household_head_vdcmun_id',
#  'percentage_education_level_household_head_district_id',
#  'percentage_income_level_household_vdcmun_id',
#  'percentage_income_level_household_district_id','Population',
#  'Households',
#  'Buildings'],axis=1)

In [87]:
df_tabla_desc=total_data.copy()

In [88]:
df_tabla_desc.head().T

Unnamed: 0,0,1,2,3,4
N_ID,chumnuwri-gorkha,dharche-gorkha,ajirkot-gorkha,palungtar-gorkha,siranchok-gorkha
LEVEL,Gaunpalika,Gaunpalika,Gaunpalika,Nagarpalika,Gaunpalika
DISTRICT,Gorkha,Gorkha,Gorkha,Gorkha,Gorkha
municipality,3605,3606,3602,3609,3611
district,36,36,36,36,36
age_household_head_municipality,34.9533,45.1767,47.1767,44.4881,46.5328
age_household_head_district,44.3466,44.3466,44.3466,44.3466,44.3466
age_building_municipality,3.17252,2.51168,2.84823,2.74701,2.97407
age_building_district,2.84528,2.84528,2.84528,2.84528,2.84528
plinth_area_sq_ft_municipality,5.80375,6.0895,6.02618,5.82849,5.78008


In [89]:
df_tabla_desc=df_tabla_desc.drop(['Population','Households','Buildings'],axis=1)

In [90]:
 list(df_tabla_desc.columns)

['N_ID',
 'LEVEL',
 'DISTRICT',
 'municipality',
 'district',
 'age_household_head_municipality',
 'age_household_head_district',
 'age_building_municipality',
 'age_building_district',
 'plinth_area_sq_ft_municipality',
 'plinth_area_sq_ft_district',
 'percentage_ground_floor_type_municipality',
 'percentage_ground_floor_type_district',
 'percentage_education_level_household_head_municipality',
 'percentage_education_level_household_head_district',
 'percentage_income_level_household_municipality',
 'percentage_income_level_household_district']

In [91]:
df_tabla_desc_vdcmun_id=df_tabla_desc.drop([ 
 'LEVEL',
'municipality',
'district',
    'age_household_head_district',
    'age_building_district',
    'plinth_area_sq_ft_district',
    'percentage_ground_floor_type_district',
    'percentage_education_level_household_head_district',
    'percentage_income_level_household_district'
],axis=1)

In [92]:
df_tabla_desc_vdcmun_id.head().T

Unnamed: 0,0,1,2,3,4
N_ID,chumnuwri-gorkha,dharche-gorkha,ajirkot-gorkha,palungtar-gorkha,siranchok-gorkha
DISTRICT,Gorkha,Gorkha,Gorkha,Gorkha,Gorkha
age_household_head_municipality,34.9533,45.1767,47.1767,44.4881,46.5328
age_building_municipality,3.17252,2.51168,2.84823,2.74701,2.97407
plinth_area_sq_ft_municipality,5.80375,6.0895,6.02618,5.82849,5.78008
percentage_ground_floor_type_municipality,50.2283,84.4345,89.3364,84.6478,96.4711
percentage_education_level_household_head_municipality,48.0506,62.4103,32.9062,25.3603,28.5791
percentage_income_level_household_municipality,71.0573,86.3895,75.6522,79.7571,77.1555


In [93]:
df_tabla_desc_district_id=df_tabla_desc.drop([ 
 'LEVEL',
'municipality',
'district',
    'age_household_head_municipality',
    'age_building_municipality',
    'plinth_area_sq_ft_municipality',
    'percentage_ground_floor_type_municipality',
    'percentage_education_level_household_head_municipality',
    'percentage_income_level_household_municipality'
],axis=1)

In [94]:
df_tabla_desc_district_id.head().T

Unnamed: 0,0,1,2,3,4
N_ID,chumnuwri-gorkha,dharche-gorkha,ajirkot-gorkha,palungtar-gorkha,siranchok-gorkha
DISTRICT,Gorkha,Gorkha,Gorkha,Gorkha,Gorkha
age_household_head_district,44.3466,44.3466,44.3466,44.3466,44.3466
age_building_district,2.84528,2.84528,2.84528,2.84528,2.84528
plinth_area_sq_ft_district,5.92114,5.92114,5.92114,5.92114,5.92114
percentage_ground_floor_type_district,80.7787,80.7787,80.7787,80.7787,80.7787
percentage_education_level_household_head_district,30.9615,30.9615,30.9615,30.9615,30.9615
percentage_income_level_household_district,76.5111,76.5111,76.5111,76.5111,76.5111


#### DATOS PARA EL ANÁLISIS DESCRIPTIVO

In [95]:
# df_desc=pd.read_csv("./Data/paso_1.csv", sep=',')

In [96]:
# df_desc.head()

In [97]:
# # Agrupamos los daños en:
# # Low: G1 + G2
# # Medium: G3 + G4
# # High: G5 

# df_desc["damage_grade"].replace(["Grade 1", "Grade 2"], "Low", regex=True, inplace= True)
# df_desc["damage_grade"].replace(["Grade 5"], "High", regex=True, inplace= True)
# df_desc["damage_grade"].replace(["Grade 3", "Grade 4"], "Medium", regex=True, inplace= True)


# df_desc["damage_grade"].value_counts().plot(kind='bar', title="damage_grade")
# plt.show()
# print(100 * df_desc['damage_grade'].value_counts() / len(df_desc['damage_grade']))

In [98]:
# sns.boxplot(x="damage_grade", y="age_household_head", data=df_desc)

In [99]:
# df_desc.head().T

In [100]:
# df_desc2=pd.merge(total_data, df_desc, how='outer')

In [101]:
# df_desc2.head()

# DASH

Pasamos a representar las dos variables: age_household_head y percentage en el mapa. 
Como se puede observar la aplicación va a permitir que el usuario seleccione dos opciones
 - Select a district. si el usuario no selecciona ningún distrito se muestra el mapa entero y los datos serían los calculados a nivel de distrito, para al variable percentage elegiríamos la columna de datos 'percentage_district'. Si el usuario seleciona un distrito entonces los datos son los de los municipios, para el caso de percentage elegiríamos 'percentage_vdcmun'. Lo hemos hecho de esta forma es más clara ver la información a nivel de distrito cuando se ve todo el mapa y a nivel de municipio cuando se hace "zoom" seleccionando un distrito. 
 
 
 - Select a variable to represent: por ahora he incluido sólo esas dos variables pero debemos incluir las que queramos

## BOOSTRAP
https://dash-bootstrap-components

In [102]:
import dash
import dash_bootstrap_components as dbc
import dash_core_components as dcc

import dash_html_components as html
import pandas as pd
from dash.dependencies import Input, Output
import plotly.graph_objects as go

In [103]:
df=total_data

In [104]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.CERULEAN])

In [105]:
app.config['suppress_callback_exceptions'] = True

In [106]:
# df = pd.read_csv('Data/total_map_con_media.csv', encoding = 'Latin-1')
# print(df)


navbar = dbc.NavbarSimple(
    children = [
        html.H2('Evaluación de daños por terremotos', style={"margin-top": 5,'fontSize':40})
    ],
    #brand = 'Earthquake Damage Assesment',
    dark = False,
    style = {'padding-left':'3', 'padding-right':'3'}
)

tab1_content = dbc.Card(
    dbc.CardBody(
        [
            html.P("Sobre el proyecto", 
                   className="card-text", 
                   style={'fontSize':30,'textAlign': 'left', 'color': '#7FDBFF'}
                  ),
            dbc.Row([
                dbc.Card(
                    dbc.CardBody(
                        [
#                             html.H4("Earthquake Damage Assessment", className="card-title"),
                            html.H6("Motivación", className="card-subtitle"),
                            html.P(
                                "La capacidad para evaluar de forma rápida el daño producido por terremotos es clave "
                                "para la respuesta y recuperación tras el desastre. ",
                                className="card-text",
                            ),
                            html.P(
                                "Las actividades de clasificación de los daños de edificios necesitan un gran esfuerzo, "
                                "en cuanto a tiempo y recursos, ya que requiere que una persona se desplace al lugar y "
                                "evalúe el daño in situ, edificio a edificio.",
                                className="card-text",
                            ),
                            html.Br(),
                            html.H6("Objetivo", className="card-subtitle"),
                            html.P(
                                "El objetivo de esta herramienta y de este proyecto es proporcionar a los usuarios de emergencia "
                                "información sobre los daños del terremoto de forma ágil y rápida utilizando Machine Learning",
                                className="card-text",
                            ),
                            html.Br(),
                            html.H6("Datos", className="card-subtitle"),
                            html.P(
                                "Tras el terremoto de 7.8 Mw en Gorkha el 25 de abril de 2015, Nepal llevó a cabo una "
                                "encuesta masiva de hogares para evaluar los daños a los edificios en los distritos afectados "
                                "por el terermoto. Estos son los datos utilizados para llevar a cabo este estudio",
                                className="card-text",
                            ),
                            dbc.CardLink("Nepal Open Data", href="https://eq2015.npc.gov.np/"),
                        ]
                    ),
                    style={"width": "72rem"},
                )
            ]),
        ]
    ),
    className="mt-3",
    style = {'padding-left':'4%', 'padding-right':'7%'}
)

tab2_content = dbc.Card(
    dbc.CardBody(
        [
            html.P("Análisis Global Nepal", className="card-text", style={'fontSize':30,'textAlign': 'left', 'color': '#7FDBFF'}),
            dbc.Row([
                dbc.Col(
                    dcc.Dropdown(
                    id='districts-dropdown-t1',
                    options=[
                        {'label': i, 'value': i} for i in df.DISTRICT.unique()
                    ],
                    placeholder="All districts",
                ),
                width = 4, align = 'center'
                ),
                dbc.Col(
                    dcc.Dropdown(
                        id='radioitems-variables-t1',
                        options = [
                            {'label': 'Edad media del cabeza de familia', 'value': 'age_household_head'},
                            {'label': 'Edad media del edificio', 'value': 'age_building'},
                            {'label': 'Tamaño medio de los hogares', 'value': 'plinth_area_sq_ft'},
                            {'label': 'Hogares cuyo tipo de suelo es barro(%)', 'value': 'percentage_ground_floor_type'},
                            {'label': 'Hogares con ingresos mensuales menores 20.000 Rupias (%)', 'value': 'percentage_income_level_household'},
                            {'label': 'Hogares cuyo cabeza de familia no tiene educación (%)', 'value': 'percentage_education_level_household_head'},

                        ],
                        value='age_household_head',
                        clearable=False
                    ),
                width = 4, align = 'center'                 
                )
            ]),
            html.Br(),
            dbc.Row([
                dbc.Col(
                    html.Div(
                        children=[                                          
                            dcc.Graph(
                                id='map-graph',
                                figure=px.choropleth_mapbox(
                                    df, 
                                    geojson=map_nepal_json, 
                                    color='age_household_head_district', 
                                    locations='N_ID',
                                    featureidkey="properties.N_ID",
                                    color_continuous_scale="Viridis",
                                    #range_color=(0, 50),
                                    mapbox_style="carto-positron",
                                    zoom=6,center = {"lat": 27.7033, "lon": 85.3181},
                                    opacity=0.5,
                                    #labels={'unemp':'unemployment rate'}
                                )
                             )   
                        ],
                    id='figura1',
                    ),
                width = 6, align = 'center'                     
                ),
                dbc.Col(
                    html.Div(
                        children=[                                          
                            dt.DataTable(
                                id='tabla_desc',
                                columns=[{'name':i,'id':i} for i in df_tabla_desc_district_id.columns],
                                style_cell={
                                    'textAlign':'left'
                                },
                                style_header={
                                    'fontWeight':'bold',
                                    'backgroundColor':'#CDCDCD'
                                },
                                data=df_tabla_desc_district_id.to_dict('records'),
#                                 filter_action='native'   
#                                 sort_mode='multi',
                                page_size=10,
                                page_current=0

                             )   
                        ],
                    id='tabla1',
                    ),
                width = 6, align = 'center'                 
                )              
            ]),
            html.Br(),
            dbc.Row([
               dbc.Col(
                   dbc.Card(
                        dbc.CardBody([
                            html.H4("Población", className="card-title"),
#                             html.H6("Card subtitle", className="card-subtitle"),
#                             html.H4('Population'),
                             html.H6(id='total_population',
                                children=[
                                    df['Population'].unique().sum()
                             ],className="card-subtitle"),
                        ])
                   )
               ),
               dbc.Col(
                    dbc.Card(
                        dbc.CardBody([
                            html.H4("Hogares", className="card-title"),
#                             html.H6("Card subtitle", className="card-subtitle"),
#                             html.H4('Population'),
                            html.H6(id='household_num',
                                children=[
                                    df['Households'].unique().sum()
                            ],className="card-subtitle"),
                        ])
                   )
               ),
#                dbc.Col(
#                     dbc.Card(
#                         dbc.CardBody([
#                             html.H4("Edificios totalmente colapsados", className="card-title"),
# #                             html.H6("Card subtitle", className="card-subtitle"),
# #                             html.H4('Population'),
#                             html.H6(id='damage_grade5',
#                                     children=[df['Buildings'].unique().sum()
#                             ],className="card-subtitle"),
#                         ])
#                    )
#                )
           ])

        ]
    ),
    className="mt-3",
    style = {'padding-left':'4%', 'padding-right':'7%'}
)

tab3_content = dbc.Card(
    dbc.CardBody(
       [
            html.P("Predicción Modelo", className="card-text", style={'fontSize':30,'textAlign': 'left', 'color': '#7FDBFF'}),
            dbc.Row([
                dbc.Col(
                    dcc.Dropdown(
                    id='districts-dropdown-t2',
                    options=[
                        {'label': i, 'value': i} for i in df.DISTRICT.unique()
                    ],
                    placeholder="All districts",
                ),
                width = 6, align = 'center'
                ),
                dbc.Col(
                    dcc.Dropdown(
                        id='radioitems-variables-t2',
                        options = [
                            {'label': 'Porcentaje de edificios con daño alto', 'value': '% edificios con daño alto'},
                            {'label': 'Porcentaje edificios con daño medio', 'value': '% edificios con daño medio'},
                            {'label': 'Porcentaje edificios con daño bajo', 'value': '% edificios con daño bajo'}
                            
                        ],
                        value='% edificios con daño alto',
                        clearable=False
                    ),
                width = 6, align = 'center'                 
                )
            ]),
            html.Br(),
            dbc.Row([
                dbc.Col(
                    html.Div(
                        children=[                                          
                            dcc.Graph(
                                id='map-graph',
                                figure=px.choropleth_mapbox(
                                    PoC_probs, 
                                    geojson=map_nepal_json, 
                                    color='% edificios con daño alto_district', 
                                    locations='N_ID',
                                    featureidkey="properties.N_ID",
                                    color_continuous_scale="Viridis",
                                    #range_color=(0, 50),
                                    mapbox_style="carto-positron",
                                    zoom=6,center = {"lat": 27.7033, "lon": 85.3181},
                                    opacity=0.5,
                                    #labels={'unemp':'unemployment rate'}
                                )
                             )   
                        ],
                    id='figura2',
                    ),   
                width = 6, align = 'center'                 
                ),
                dbc.Col(
                    html.Div(
                        children=[                                          
                            dt.DataTable(
                                id='tabla_PoC',
                                columns=[{'name':i,'id':i} for i in df_tabla_PoC.columns],
                                style_cell={
                                    'textAlign':'left'
                                },
                                style_header={
                                    'fontWeight':'bold',
                                    'backgroundColor':'#CDCDCD'
                                },
                                data=df_tabla_PoC.to_dict('records'),
#                                 filter_action='native'   

                                sort_action='native',
#                                 sort_mode='multi',
                                page_size=10,
                                page_current=0

                             )   
                        ],
                    id='tabla2',
                    ),
                width = 6, align = 'center'                 
                )
            ]),
#            dbc.Row([
#                dbc.Col(
                   
#                )
#            ]);
        ]
    ),
    className="mt-3",
    style = {'padding-left':'4%', 'padding-right':'7%'}
)


tabs = html.Div(
    [
        dbc.Tabs(
            [
                dbc.Tab(label="Sobre el proyecto", tab_id="tab-1"),
                dbc.Tab(label="Fase de preparación", tab_id="tab-2"),
                dbc.Tab(label="Fase de respuesta", tab_id="tab-3")
            ],
            id="tabs",
            active_tab="tab-1",
        ),
        html.Div(id="content"),
    ]
)

    

app.layout = html.Div([navbar, tabs])




In [107]:
@app.callback(
    dash.dependencies.Output('figura1','children'),
    [dash.dependencies.Input('districts-dropdown-t1','value'),
    dash.dependencies.Input('radioitems-variables-t1','value')
    ]
)
def update_graph(district,variable):
#     print(f'Estoy en el update')
    if district==None:
#         print(f'Estoy en el if del update')
        variable= variable+'_district'
        return dcc.Graph(
            id='map-graphic',
            figure=px.choropleth_mapbox(
                df, #data_json, 
                geojson=map_nepal_json, 
                color=variable, 
                locations='N_ID',
                featureidkey="properties.N_ID",
                color_continuous_scale="Viridis",
                #range_color=(0, 50),
                mapbox_style="carto-positron",
                zoom=6,center = {"lat": 27.7033, "lon": 85.3181},
                opacity=0.5,
                labels={'unemp':variable}
            )
        )
    else:
#         print(f'Estoy en el else del update')
        variable= variable+'_municipality'
        return dcc.Graph(
            id='map-graphic',
            figure = px.choropleth_mapbox(
                df.loc[df['DISTRICT']==district], 
                geojson=map_nepal_json, 
                color=variable, 
                locations='N_ID',
                featureidkey="properties.N_ID",
                color_continuous_scale="Viridis",
                #range_color=(0, 50),
                mapbox_style="carto-positron",
                zoom=6, 
                center = {"lat": 27.7033, "lon": 85.3181},
                opacity=0.5,
                labels={'unemp':variable}
                )
        )

In [108]:
@app.callback(
    dash.dependencies.Output('figura2','children'),
    [dash.dependencies.Input('districts-dropdown-t2','value'),
    dash.dependencies.Input('radioitems-variables-t2','value')
    ]
)
def update_graph(district,variable):
#     print(variable)
#     print(f'Estoy en el update')
    if district==None:
#         print(f'Estoy en el if del update')
        variable= variable+'_district'
#         print(variable)    
        return dcc.Graph(
            id='map-graphic',
            figure=px.choropleth_mapbox(
                PoC_probs, #data_json, 
                geojson=map_nepal_json, 
                color=variable, 
                locations='N_ID',
                featureidkey="properties.N_ID",
                color_continuous_scale="Viridis",
                #range_color=(0, 50),
                mapbox_style="carto-positron",
                zoom=6,center = {"lat": 27.7033, "lon": 85.3181},
                opacity=0.5,
                labels={'unemp':variable},
                title='Predicciones sobre el terremoto de Nepal'
            )
        )
    else:
#         print(f'Estoy en el else del update')
        variable= variable+'_municipality'
#         print(variable)
        return dcc.Graph(
            id='map-graphic',
            figure = px.choropleth_mapbox(
                PoC_probs.loc[PoC_probs['DISTRICT']==district],
                geojson=map_nepal_json, 
                color=variable, 
                locations='N_ID',
                featureidkey="properties.N_ID",
                color_continuous_scale="Viridis",
                #range_color=(0, 50),
                mapbox_style="carto-positron",
                zoom=6, 
                center = {"lat": 27.7033, "lon": 85.3181},
                opacity=0.5,
                labels={'unemp':variable},
                title='Predicciones terremoto de Nepal'
                )
        )

In [109]:
@app.callback(Output("content", "children"), [Input("tabs", "active_tab")])
def switch_tab(at):
    if at == "tab-1":
        return tab1_content
    elif at == "tab-2":
        return tab2_content
    elif at == "tab-3":
        return tab3_content
    return html.P("This shouldn't ever be displayed...")
    

In [110]:
@app.callback(
    [dash.dependencies.Output("total_population",'children'),
    dash.dependencies.Output("household_num",'children'),
    dash.dependencies.Output("damage_grade5",'children')],
    [dash.dependencies.Input('districts-dropdown-t1','value')
    ]

)
def update_info(district):
#     print(f'Estoy en el update')
    if district==None:
        total_population=df['Population'].unique().sum()
        household_num=df['Households'].unique().sum()
        damage_grade5=df['Buildings'].unique().sum()
#         print(f'Estoy en el if del update de las cajas')
 
    else:
        df_fil=df.loc[df['DISTRICT']==district]
        total_population=df_fil['Population'].mean()
        household_num=df_fil['Households'].mean()
        damage_grade5=df_fil['Buildings'].mean()

#         print(f'Estoy en el else del update de las cajas')

    return(total_population, household_num, damage_grade5)

In [111]:
@app.callback(
    dash.dependencies.Output('tabla1','children'),
    [dash.dependencies.Input('districts-dropdown-t1','value'),
    dash.dependencies.Input('radioitems-variables-t1','value')
    ]
)
def update_graph(district,variable):
#     print(f'Estoy en el update')
    if district==None:
#         print(f'Estoy en el if del update')
        df_tabla_aux_desc=df_tabla_desc_district_id
        variable= variable+'_district'
        df_tabla_aux_desc=df_tabla_aux_desc.loc[:,['N_ID','DISTRICT',variable]]
#         print(variable)    
        return dt.DataTable(
                    id='tabla_desc',
                    columns=[{'name':i,'id':i} for i in df_tabla_aux_desc.columns],
                    style_cell={
                        'textAlign':'left'
                    },
                    style_header={
                        'fontWeight':'bold',
                    'backgroundColor':'#CDCDCD'
                                    },
                    data=df_tabla_aux_desc.to_dict('records'),
#                                 filter_action='native'   
                    sort_mode='multi',
                    page_size=10,
                    page_current=0
                )   
    else:
#         print(f'Estoy en el else del update')
        variable= variable+'_municipality'
#         print(variable)
        df_tabla_aux_desc=df_tabla_desc_vdcmun_id
        df_tabla_aux_desc=df_tabla_aux_desc.loc[df_tabla_aux_desc['DISTRICT']==district]
        df_tabla_aux_desc=df_tabla_aux_desc.loc[:,['N_ID','DISTRICT',variable]]
        return dt.DataTable(
                    id='tabla_desc',
                    columns=[{'name':i,'id':i} for i in df_tabla_aux_desc.columns],
                    style_cell={
                        'textAlign':'left'
                    },
                    style_header={
                        'fontWeight':'bold',
                    'backgroundColor':'#CDCDCD'
                                    },
                    data=df_tabla_aux_desc.to_dict('records'),

#                                 filter_action='native'   
                    sort_mode='multi',
                    page_size=10,
                    page_current=0
                )  


In [112]:
@app.callback(
    dash.dependencies.Output('tabla2','children'),
    [dash.dependencies.Input('districts-dropdown-t2','value'),
    dash.dependencies.Input('radioitems-variables-t2','value')
    ]
)
def update_graph(district,variable):
#     print(f'Estoy en el update')
    if district==None:
#         print(f'Estoy en el if del update')
        variable= variable+'_district'
        df_tabla_aux_PoC=df_tabla_PoC.loc[:,['N_ID','building_id','DISTRICT','probabilidad colapso edificio',variable]]
        df_tabla_aux_PoC=df_tabla_aux_PoC.sort_values(by='probabilidad colapso edificio',ascending=False)
#         print(variable)    
        return dt.DataTable(
                    id='tabla_PoC',
                    columns=[{'name':i,'id':i} for i in df_tabla_aux_PoC.columns],
                    style_cell={
                        'textAlign':'left'
                    },
                    style_header={
                        'fontWeight':'bold',
                    'backgroundColor':'#CDCDCD'
                                    },
                    data=df_tabla_aux_PoC.to_dict('records'),
#                                 filter_action='native'
                    sort_action='native',
#                     sort_mode='multi',
                    page_size=10,
                    page_current=0
                )   
    else:
#         print(f'Estoy en el else del update')
        variable= variable+'_municipality'
#         print(variable)
        df_aux_tabla_PoC=df_tabla_PoC.loc[df_tabla_PoC['DISTRICT']==district]
        df_aux_tabla_PoC=df_aux_tabla_PoC.loc[:,['N_ID','building_id','DISTRICT','probabilidad colapso edificio',variable]]
        df_aux_tabla_PoC=df_aux_tabla_PoC.sort_values(by='probabilidad colapso edificio',ascending=False)
        return dt.DataTable(
                    id='tabla_PoC',
                    columns=[{'name':i,'id':i} for i in df_aux_tabla_PoC.columns],
                    style_cell={
                        'textAlign':'left'
                    },
                    style_header={
                        'fontWeight':'bold',
                    'backgroundColor':'#CDCDCD'
                                    },
                    data=df_aux_tabla_PoC.to_dict('records'),

#                                 filter_action='native'   
                    sort_action='native',   
#                     sort_mode='multi',
                    page_size=10,
                    page_current=0
                )  


In [113]:
# df_tabla_PoC.head().T

In [114]:
if __name__ == "__main__":
    app.run_server(debug=True, use_reloader=False)


Running on http://127.0.0.1:8050/
Debugger PIN: 000-030-151
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
