In [1]:
import pandas as pd # Manejo de bases de datos
import geopandas as gpd # Manejo de bases de datos geográficas
import numpy as np # Funciones numéricas
import matplotlib.pyplot as plt # Gráficas
import seaborn as sns # Gráficas
import datetime as dt
import folium
import unicodedata
import datetime as dt


from branca.colormap import linear
from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

### Death's by locality

In [3]:
########################################################################################################
################################ Preparing Data for Bogotá city  #######################################
########################################################################################################

# Get death count from epidemiological report

import pandas as pd
import geopandas as gpd
import datetime as dt
import numpy as np
import json
import matplotlib.pyplot as plt

from unicodedata import normalize

#Excel file with Bogotá localities, id, latitude and longitude  
path = "/home/ubuntu/javeriana/MOTUS-PUJ/Step_1/Files/Localidades_Nombres.xlsx"
Bog = pd.read_excel(path, sheet_name='Hoja1')

# Bogota info DF provided by local Town Hall
path = "/home/ubuntu/javeriana/MOTUS-PUJ/Step_1/Files/georeferencia-puntual-por-localidad.csv"
geoBog = pd.read_csv(path, encoding = 'UTF-8',  sep='\t', delimiter=";")
geoBog = geoBog.sort_values(by='CODIGO', ascending=True)

# CSV containing confirmed Covid-19 cases in Bogotá sorted by localities 
# a locality is a big region in Bogotá city. Inside every locality there are several neighbourhoods
path = "/home/ubuntu/javeriana/MOTUS-PUJ/Step_1/Files/casos_confirmados_Bog.csv"
CovidBog = pd.read_csv(path, encoding = "UTF-8", sep='\t', delimiter=";", low_memory=False)

del path

In [4]:
# In CovidBog DF
# Drop Cols we don´t need
CovidBog = CovidBog.drop(columns=['EDAD', 'FUENTE_O_TIPO_DE_CONTAGIO', 'UBICACION', 'CIUDAD', 'UNI_MED', 'SEXO', 'CASO' ])

# add some cols in order to write in them needed data 
CovidBog['CODIGO_LOCALIDAD'] = 0 

# Re-order cols 
CovidBog = CovidBog.reindex(columns=['FECHA_DE_INICIO_DE_SINTOMAS', 'FECHA_DIAGNOSTICO', 'LOCALIDAD_ASIS', 'CODIGO_LOCALIDAD', 'ESTADO'])

# Delete rows with missing values 
CovidBog = CovidBog.dropna(axis=0, how='any', subset=['FECHA_DIAGNOSTICO'])

In [5]:
#changing all locality strings to lower_case and taking off accents
trans_tab = dict.fromkeys(map(ord, u'\u0301\u0308'), None)

#Bog DF
Bog['Localidad'] = Bog.apply(lambda row: row['Localidad'].lower(), axis=1)
Bog['Localidad'] = Bog.apply(lambda row: normalize('NFKC', normalize('NFKD', row['Localidad']).translate(trans_tab)), axis=1)

#geoBog DF
geoBog['LOCALIDAD'] = geoBog.apply(lambda row: row['LOCALIDAD'].lower(), axis=1)
geoBog['LOCALIDAD'] = geoBog.apply(lambda row: normalize('NFKC', normalize('NFKD', row['LOCALIDAD']).translate(trans_tab)), axis=1)

#CovidBog DF
CovidBog['LOCALIDAD_ASIS'] = CovidBog.apply(lambda row: row['LOCALIDAD_ASIS'].lower(), axis=1)
CovidBog['LOCALIDAD_ASIS'] = CovidBog.apply(lambda row: normalize('NFKC', normalize('NFKD', row['LOCALIDAD_ASIS']).translate(trans_tab)), axis=1)

#delete vars no longer used 
del trans_tab

In [6]:
#Equal FECHA_DE_INICIO_DE_SINTOMAS to FECHA_DIAGNOSTICO when FECHA_DE_INICIO_DE_SINTOMAS = NaN
#Detect NaN Sympthoms date
CovidBog['AUX'] = CovidBog['FECHA_DE_INICIO_DE_SINTOMAS'].isna() 

#Change NaN Dates by diagnosis date. 
#CovidBog['FECHA_DE_INICIO_DE_SINTOMAS'] = CovidBog.apply(lambda row: row['FECHA_DIAGNOSTICO'] if row['FECHA_DE_INICIO_DE_SINTOMAS'].isna() == True else row['FECHA_DE_INICIO_DE_SINTOMAS'], axis=1)
CovidBog['FECHA_DE_INICIO_DE_SINTOMAS'] = CovidBog.apply(lambda row: row['FECHA_DIAGNOSTICO'] if row['AUX'] == True else row['FECHA_DE_INICIO_DE_SINTOMAS'], axis=1)
CovidBog = CovidBog.drop(columns=['AUX'])

In [7]:
#############################################################################################################
########### Declare variables and lists used in code to asign/calculate localities, latitude, longitude #####
#############################################################################################################

#Localities name list 
Loc_hist_names = list(Bog['Localidad'])

#Local id code list
Loc_codes = list(range(len(Loc_hist_names)))

#############################################################################
################## method used for filling local ID code ####################
#############################################################################
def local_ID (localidad):
    for i in range(len(Loc_hist_names)):
        if localidad == Loc_hist_names[i]:
            return Loc_codes[i]
        
#Asign local ID code 
CovidBog['CODIGO_LOCALIDAD'] = CovidBog.apply(lambda row: local_ID(row['LOCALIDAD_ASIS']), axis=1)

In [8]:
# Change str Dates to Datetime objects
CovidBog['FECHA_DIAGNOSTICO'] = pd.to_datetime(CovidBog['FECHA_DIAGNOSTICO'], format='%Y/%m/%d')
CovidBog['FECHA_DE_INICIO_DE_SINTOMAS'] = pd.to_datetime(CovidBog['FECHA_DE_INICIO_DE_SINTOMAS'], format='%Y/%m/%d')

#Re organize rows by ascending datetime object
CovidBog = CovidBog.sort_values(by='FECHA_DE_INICIO_DE_SINTOMAS', ascending=True)
CovidBog = CovidBog.reset_index(drop=True)

# Filter for death cases
CovidDeath = CovidBog.copy()
CovidDeath = CovidDeath[ CovidDeath['ESTADO'] != 'Recuperado' ]
CovidDeath.reset_index(drop=True, inplace=True)

CovidDeath.head(3)

Unnamed: 0,FECHA_DE_INICIO_DE_SINTOMAS,FECHA_DIAGNOSTICO,LOCALIDAD_ASIS,CODIGO_LOCALIDAD,ESTADO
0,2020-03-06,2020-03-26,barrios unidos,12,Fallecido
1,2020-03-07,2020-03-29,kennedy,8,Fallecido
2,2020-03-07,2020-03-29,usaquen,1,Fallecido


In [9]:
# Group deaths so we can count them in the same form we count active cases

#Group historic reports by date and localities
deaths = CovidDeath.groupby(['FECHA_DIAGNOSTICO', 'CODIGO_LOCALIDAD'])['LOCALIDAD_ASIS'].count()
deaths = pd.DataFrame(deaths)
deaths.reset_index(drop=False, inplace=True)

deaths = deaths.rename(columns={'LOCALIDAD_ASIS': 'FALLECIDOS'})#Rename Column

#Change str to datetime objects
deaths['FECHA_DIAGNOSTICO'] = pd.to_datetime(deaths['FECHA_DIAGNOSTICO'], format='%Y-%m-%d')
deaths = deaths.sort_values(by='FECHA_DIAGNOSTICO', ascending=True)
deaths = deaths.reset_index(drop=True)

#Drop 0 and 21 id codes since they don´t belong to Bogotá geography 
index_0 = deaths[ deaths['CODIGO_LOCALIDAD'] == 0 ].index
index_21 = deaths[ deaths['CODIGO_LOCALIDAD'] == 21 ].index
deaths.drop(index_0, inplace = True)#drop fuera de bogotá and sin dato rows since we cannot calculate infection 
deaths.drop(index_21, inplace = True)# density for them (we don´t know their reference population)

deaths.reset_index(drop=True, inplace=True)
deaths.to_pickle('/home/ubuntu/javeriana/MOTUS-PUJ/Step_3/Outputs/death_count.pkl')
deaths.head(2)

Unnamed: 0,FECHA_DIAGNOSTICO,CODIGO_LOCALIDAD,FALLECIDOS
0,2020-03-20,1,1
1,2020-03-20,16,1


In [10]:
com_index = len(deaths['FECHA_DIAGNOSTICO']) #column length 
start_date = deaths.loc[0, 'FECHA_DIAGNOSTICO'] #start of the pandemic in Bogotá 
end_date = deaths.loc[com_index-1, 'FECHA_DIAGNOSTICO'] #Last reported date 

pan_days = end_date - start_date
pan_days = int(pan_days.days) #Days passed since pandemic start to last reported date grid x axis 
local_bog = len(Loc_hist_names) - 2 # number of rows y axis we are not considering 0 and 21 codes 

#create grid and fill it with reported cases y axis correspond to Bogota localities id code, x axis correspond to 
#number of days passed since pandemic started that way 0 index -> 2020-02-06, 1->2020-02-07 and so on

grid = np.ndarray([local_bog, pan_days+1]) #create grid
grid.fill(0) #fill grid with 0 

#Method used to fill grid with reported cases 
def fillGrid(date, code, cases):
    col = date - start_date
    col = int(col.days)
    grid[code-1][col] = cases
    

#Fill grid with cases 
deaths.apply(lambda row: fillGrid(row['FECHA_DIAGNOSTICO'], row['CODIGO_LOCALIDAD'], int(row['FALLECIDOS']) ), axis=1)

0       None
1       None
2       None
3       None
4       None
        ... 
7345    None
7346    None
7347    None
7348    None
7349    None
Length: 7350, dtype: object

In [12]:
## Estimated RT for each locality 
# Reading localities R_t
base_path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_1/RT_outputs/'

loc_R_list = ['usaquen.pkl', 'chapinero.pkl', 'santafe.pkl', 'sancristobal.pkl', 'usme.pkl',
             'tunjuelito.pkl', 'bosa.pkl', 'kennedy.pkl', 'fontibon.pkl', 'engativa.pkl',
             'suba.pkl', 'barriosunidos.pkl', 'teusaquillo.pkl', 'losmartires.pkl', 'antonionariño.pkl',
             'puentearanda.pkl', 'lacandelaria.pkl', 'rafaeluribeuribe.pkl', 'ciudadbolivar.pkl']

R_list = []
# Load data frames, reset index
for i in range(len(loc_R_list)):
    path_file = base_path+loc_R_list[i]
    R_list.append(pd.read_pickle(path_file))
    
for i in range(len(loc_R_list)):
    R_list[i].reset_index(drop=False, inplace=True)
    
# Create DF containing all RT scores for each locality 
R_df = pd.DataFrame(index = R_list[0]['Time Stamp'])
R_df.reset_index(drop=False, inplace=True)

for i in range(len(loc_R_list)):
    R_df[loc_R_list[i]] = 0
    R_df[loc_R_list[i]] = R_list[i]['R'].tolist()

In [13]:
#Function to count deaths past 15 days
def ActiveCases (date, code):
    col = date - start_date
    col = int(col.days)
    if col >= 15:
        Active = sum(grid[code-1][col-15:col+1])
    else: 
        Active = sum(grid[code-1][0:col+1])
    return int(Active)

Loc_codes = list(range(1,20))
# Make a map to compare localities risk evaluated by death and RT vs UTAMs risk

date_list = ['01/05/2020', '01/08/2020', '10/01/2021', '10/03/2021', '20/06/2021'] # d m Y
date_list2 = ['01-05-2020', '01-08-2020', '10-01-2021', '10-03-2021', '20-06-2021']

#date_list = ['01/05/2020' ,'01/08/2020', '01/11/2020', '10/01/2021', '10/03/2021', '01/05/2021',
#             '20/06/2021', '01/09/2021']

#date_list2 = ['01-05-2020' ,'01-08-2020', '01-11-2020', '10-01-2021', '10-03-2021', '01-05-2021',
#             '20-06-2021', '01-09-2021']

#Count deahts and RT for each locality at designed dates
#Count deaths
death_count = []
for i in range(len(date_list)):
    date = dt.datetime.strptime(date_list[i], "%d/%m/%Y")
    temp = []
    for j in range (len(Loc_codes)):
        temp.append(ActiveCases(date, Loc_codes[j]))
        
    death_count.append(temp)
    del temp
    

In [14]:
# Count RT at designed dates
RT_listD = []
for i in range(len(date_list)):
    date = dt.datetime.strptime(date_list[i], "%d/%m/%Y")
    rt_list = R_df[ R_df['Time Stamp'] == date ]
    rt_list = rt_list.iloc[:, 1:].values.tolist()[0]
    RT_listD.append(rt_list)

In [15]:
#Normalize values in each date 
RT_listDN = RT_listD.copy()
death_countN = death_count.copy()

for i in range(len(date_list)):
    maax = max(RT_listDN[i])
    dmax = max(death_count[i])
    for j in range(len(Loc_codes)):
        RT_listDN[i][j] = RT_listDN[i][j]/maax
        death_countN[i][j] = death_countN[i][j]/dmax
        

In [16]:
#Compute score similar to the one computed for deciding risk levels at clustering
score = []
for i in range(len(date_list)):
    temp = []
    for j in range(len(Loc_codes)):
        temp.append(RT_listDN[i][j]+death_countN[i][j])
    score.append(temp)
    del temp
    
# Finally execute labeling for this score
manual_label = []
for i in range(len(date_list)):
    temp = []
    for j in range(len(Loc_codes)):
        if score[i][j] <= 0.8:
            temp.append(0)
        if score[i][j] > 0.8 and score[i][j] <= 1.4:
            temp.append(1)
        if score[i][j] > 1.4:
            temp.append(2)
    manual_label.append(temp)
    del temp

In [18]:
# Read UTAM clustered DFs to compare them with localities
base_path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_3/Files/ClustDFs/'
ClustUtam = []
for i in range(len(date_list2)):
    name = base_path+'date_'+date_list2[i]+'.pkl'
    temp = pd.read_pickle(name)
    temp = gpd.GeoDataFrame(temp)
    temp.crs = 'EPSG:4326'
    temp.to_crs(epsg=4326, inplace=True)
    ClustUtam.append(temp)

In [19]:
# Create a map 
# Make a map
def Arreglar_tilde(Texto):
    Texto = unicodedata.normalize('NFD', Texto)
    Texto = Texto.encode('ascii', 'ignore')
    Texto = Texto.decode("utf-8")
    Texto = Texto.lower()
    return(Texto)

path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_2/1_spatial/spatial_features/Locality/Loca.shp'
Localities = gpd.read_file(path)
Localities = Localities.sort_values(by='LocCodigo', ascending=True)
Localities = Localities.reset_index(drop=True)
Localities['LocCodigo'] = pd.to_numeric(Localities['LocCodigo'], downcast='integer')
Localities.to_crs(epsg=4326, inplace=True)
Localities.drop(columns=['LocAAdmini', 'LocArea', 'SHAPE_Leng', 'SHAPE_Area'], inplace=True)
Localities['LocNombre'] = Localities.apply(lambda row: Arreglar_tilde(row['LocNombre']), axis=1)
Localities.drop(19, axis=0, inplace=True)
Localities.head(2)

Unnamed: 0,LocNombre,LocCodigo,geometry
0,usaquen,1,"POLYGON ((-74.01116 4.66459, -74.01117 4.66460..."
1,chapinero,2,"POLYGON ((-74.01116 4.66459, -74.01154 4.66461..."


In [20]:
#Init folium map object for Bogotá
Lat = 4.61
Long = -74.082
m = folium.Map(location=[Lat, Long], zoom_start=12, tiles='CartoDB positron')

feature_group1 = folium.FeatureGroup(name=date_list[0]+'Loc' ,overlay=True, show=False).add_to(m)
feature_group2 = folium.FeatureGroup(name=date_list[1]+'Loc' ,overlay=True, show=False).add_to(m)
feature_group3 = folium.FeatureGroup(name=date_list[2]+'Loc' ,overlay=True, show=False).add_to(m)
feature_group4 = folium.FeatureGroup(name=date_list[3]+'Loc' ,overlay=True, show=False).add_to(m)
feature_group5 = folium.FeatureGroup(name=date_list[4]+'Loc' ,overlay=True, show=False).add_to(m)

feature_group6 = folium.FeatureGroup(name=date_list[0]+'Utam' ,overlay=False).add_to(m)
feature_group7 = folium.FeatureGroup(name=date_list[1]+'Utam' ,overlay=False).add_to(m)
feature_group8 = folium.FeatureGroup(name=date_list[2]+'Utam' ,overlay=False).add_to(m)
feature_group9 = folium.FeatureGroup(name=date_list[3]+'Utam' ,overlay=False).add_to(m)
feature_group10 = folium.FeatureGroup(name=date_list[4]+'Utam' ,overlay=False).add_to(m)

fs = [feature_group1, feature_group2, feature_group3, feature_group4, 
      feature_group5, feature_group6, feature_group7, feature_group8, 
      feature_group9, feature_group10]

In [21]:
for i in range(len(date_list)):
    LocTemp = Localities.copy()
    LocTemp['Manual_label'] = manual_label[i]
    choropleth1 = folium.Choropleth(
    geo_data=LocTemp,
    name='choropleth',
    data=LocTemp,
    columns=['LocNombre', 'Manual_label'],
    key_on='feature.properties.LocNombre',
    fill_color='YlOrRd',
    nan_fill_color="white",
    fill_opacity=0.5,
    line_opacity=0.2,
    legend_name='Strat_risk',
    highlight=True,
    line_color='black').geojson.add_to(fs[i])
    
    geojson1 = folium.GeoJson(data=LocTemp, 
                              name='Localidades', 
                              smooth_factor=2, 
                              style_function=lambda x: {'color':'black','fillColor':'transparent','weight':0.5}, 
                              tooltip=folium.GeoJsonTooltip(fields=['Manual_label'],
                                                            aliases=['label Manual'],
                                                            labels=True, 
                                                            sticky=True), 
                              highlight_function=lambda x: {'weight':3,'fillColor':'grey'}, 
                             ).add_to(choropleth1)
    
    choropleth2 = folium.Choropleth(
    geo_data=ClustUtam[i],
    name='choropleth2',
    data=ClustUtam[i],
    columns=['UTAMNombre', 'Risk'],
    key_on='feature.properties.UTAMNombre',
    fill_color='YlOrRd',
    nan_fill_color="white",
    fill_opacity=1,
    line_opacity=0.2,
    legend_name='Strat_risk',
    highlight=True,
    line_color='black').geojson.add_to(fs[i+5])
    
    geojson1 = folium.GeoJson(data=ClustUtam[i], 
                              name='UTAM', 
                              smooth_factor=2, 
                              style_function=lambda x: {'color':'black','fillColor':'transparent','weight':0.5}, 
                              tooltip=folium.GeoJsonTooltip(fields=['Risk'],
                                                            aliases=['Strat Clust Risk'],
                                                            labels=True, 
                                                            sticky=True), 
                              highlight_function=lambda x: {'weight':3,'fillColor':'grey'}, 
                             ).add_to(choropleth2)

    
colormap = linear.YlOrRd_09.scale(0, 2).to_step(2)
colormap.caption = 'Strat_risk'
colormap.add_to(m)

folium.TileLayer('cartodbpositron', overlay=True, name="light mode").add_to(m)
folium.LayerControl(collapsed=False).add_to(m)

<folium.map.LayerControl at 0x7ff2c9c6d9d0>

In [None]:
m

In [22]:
#m.save('/home/ubuntu/javeriana/MOTUS-PUJ/Graficas/Implementacion/UtamVSloc.html')

In [23]:
del m

In [24]:
# Mean risk label by locality 
MeanClustLoc = []
for i in range(len(date_list)):
    temp = []
    for j in range(len(Loc_codes)):
        df = ClustUtam[i][ClustUtam[i]['LOCid'] == Loc_codes[j]]
        tot = sum(df['Risk'].values.tolist())
        mean = tot/len(df['Risk'])
        temp.append(mean)
    MeanClustLoc.append(temp)
    del temp

In [25]:
# Mean Square Error function
def ECM(R_Bog, R_Al):
    ecm = 0
    for i in range(len(R_Bog)):
        ecm += (R_Al[i] - R_Bog[i])**2
    ecm = ecm/len(R_Bog)
    return ecm 

In [26]:
# Compare UTAM risk with manual risk
UtamECM = []
for i in range(len(manual_label)):
    UtamECM.append(ECM(MeanClustLoc[i], manual_label[i]))

In [27]:
UtamECM

[0.18170305061595787,
 0.15986825529432047,
 0.1491812865497076,
 0.1328012905942104,
 0.22533801434803943]

### Route error (city block)

In [29]:
# Compare Loc manual labels with mean labels 
base_path = '/home/ubuntu/javeriana/MOTUS-PUJ/Step_3/Files/ClassDFs/'
cityBlockDF = []
for i in range(len(date_list2)):
    name = base_path+'date_'+date_list2[i]+'.pkl'
    temp = pd.read_pickle(name, compression='gzip')
    temp = gpd.GeoDataFrame(temp)
    temp.crs = 'EPSG:4326'
    temp.to_crs(epsg=4326, inplace=True)
    cityBlockDF.append(temp)

In [30]:
# Mean risk label city block to locality
MeanClustBlock = []
for i in range(len(date_list)):
    temp = []
    for j in range(len(Loc_codes)):
        df = cityBlockDF[i][cityBlockDF[i]['LOCid'] == Loc_codes[j]]
        tot = sum(df['RiskStrat'].values.tolist())
        mean = tot/len(df['RiskStrat'])
        temp.append(mean)
    MeanClustBlock.append(temp)
    del temp

In [31]:
# Mean Squared Error
CityBlockECM = []
for i in range(len(manual_label)):
    CityBlockECM.append(ECM(MeanClustBlock[i], manual_label[i]))

In [32]:
CityBlockECM

[0.15751484937638857,
 0.2630958959137962,
 0.3652503546495923,
 0.26192795636655736,
 0.5131539578419445]