# Dengue
Notebook for constructing dengue cases across a grid

In [1]:
import sys
sys.path.append('../src/')
import contextily as cx


In [2]:
# Useful imports
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from constants import *
from utils.publication_functions import beautify_data_column_name
from sklearn.cluster import KMeans
from flows.specific.custom_flow import CustomFlow

# Data sources
from data_sources.specific.coca import Coca
from data_sources.specific.gold_stock_price import GoldStockPrice
from data_sources.specific.malaria_cases import Malaria
from data_sources.specific.dengue_cases import Dengue
from data_sources.specific.malaria_cases_sivigila_4 import MalariaSivigila4
from data_sources.specific.mock_malaria_cases import MockMalaria
from data_sources.specific.temperature_average import TemperatureAverage
from data_sources.specific.temperature_max import TemperatureMax
from data_sources.specific.temperature_min import TemperatureMin
from data_sources.specific.temperature_average import TemperatureAverage
from data_sources.specific.precipitation_average import PrecipitationAverage
from data_sources.specific.precipitation_total import PrecipitationTotal
from data_sources.specific.deforestation_average import DeforestationAverage
from data_sources.specific.deforestation_total import DeforestationTotal

# Embedders
from embedders.specific.identity_embedder import IdentityEmbbeder
from embedders.specific.linear_regression_coefficient_embedder import LinearRegressionCoefficientEmbedder
from embedders.specific.mobility_to_distance_embedder import MobilityToDistanceEmbeder
from embedders.specific.mobility_to_similarity_embedder import MobilityToSimilarityEmbeder

# Clusteres
from clusterers.specific.identity_clusterer import IdentityClusterer
from clusterers.specific.sklearn_vector_clusterer import SklearnVectorClusterer
from clusterers.specific.two_tier_dbscan_clusterer import TwoTierDBSCANClusterer
from clusterers.specific.similarity_community_clusterer import SimilarityCommunityClusterer

# Geographies
from geography.specific.colombian_municipalities import ColombianMunicipalities
from geography.specific.colombian_frontiers import ColombianFrontiers
from geography.specific.colombia_main_rivers import ColombianMainRivers
from geography.general.geography_from_flow_output import GeographyFromFlowOutput
from geography.specific.colombia_grid import ColombianGrid


pd.set_option('display.max_rows', 100)



The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!




## 1. Compute Malaria

In [3]:

included_sources = [Dengue]
included_matrix_sources = []

flow = CustomFlow(ID = 'dengue_by_grid',
                    name = "Dengue By Grid",
                    time_resolution = MONTH,
                    geography = ColombianGrid(),
                    vector_data_sources=included_sources,
                    matrix_data_sources=included_matrix_sources)

flow.run()

Starts Flow: Dengue By Grid
   Loads Initial Geography
   Loads Data
      Loads Vector Data
      Found 1 elements
         Extracts 1 of 1: Dengue (dengue_sivigila_4) 
         Loads Data
         Builds Overlay
         By Dates. From 2006 to 2021
            2006-01-08T00:00:00.000000000
            2006-01-15T00:00:00.000000000
            2006-01-22T00:00:00.000000000
            2006-01-29T00:00:00.000000000
            2006-02-05T00:00:00.000000000
            2006-02-12T00:00:00.000000000
            2006-02-19T00:00:00.000000000
            2006-04-16T00:00:00.000000000
            2006-04-30T00:00:00.000000000
            2006-05-07T00:00:00.000000000
            2006-05-28T00:00:00.000000000
            2006-06-04T00:00:00.000000000
            2006-06-11T00:00:00.000000000
            2006-06-18T00:00:00.000000000
            2006-07-09T00:00:00.000000000
            2006-07-23T00:00:00.000000000
            2006-09-10T00:00:00.000000000
            2006-10-01T00:00:00.000

## Visualize

In [None]:
df = flow.df_embedded_vector
df = df[df.date >= pd.to_datetime('2018-01-01')]
df = df[['ID','malaria_sivigila_4_num_cases_sum']].groupby('ID').sum().reset_index()

df = flow.initial_geometry.merge(flow.df_embedded_vector)

df



Unnamed: 0,ID,left,top,right,bottom,geometry,date,malaria_sivigila_4_num_cases_average,malaria_sivigila_4_num_cases_sum
0,10.0,-9098767.66,1.414865e+06,-9088767.66,1.404865e+06,"MULTIPOLYGON (((-81.68913 12.55114, -81.68899 ...",2006-01-31,0.0,0.0
1,10.0,-9098767.66,1.414865e+06,-9088767.66,1.404865e+06,"MULTIPOLYGON (((-81.68913 12.55114, -81.68899 ...",2006-02-28,0.0,0.0
2,10.0,-9098767.66,1.414865e+06,-9088767.66,1.404865e+06,"MULTIPOLYGON (((-81.68913 12.55114, -81.68899 ...",2006-04-30,0.0,0.0
3,10.0,-9098767.66,1.414865e+06,-9088767.66,1.404865e+06,"MULTIPOLYGON (((-81.68913 12.55114, -81.68899 ...",2006-05-31,0.0,0.0
4,10.0,-9098767.66,1.414865e+06,-9088767.66,1.404865e+06,"MULTIPOLYGON (((-81.68913 12.55114, -81.68899 ...",2006-06-30,0.0,0.0
...,...,...,...,...,...,...,...,...,...
2160704,32807.0,-7448767.66,1.448653e+05,-7438767.66,1.348653e+05,"POLYGON ((-66.87650 1.30046, -66.87826 1.29667...",2020-09-30,0.0,0.0
2160705,32807.0,-7448767.66,1.448653e+05,-7438767.66,1.348653e+05,"POLYGON ((-66.87650 1.30046, -66.87826 1.29667...",2020-10-31,0.0,0.0
2160706,32807.0,-7448767.66,1.448653e+05,-7438767.66,1.348653e+05,"POLYGON ((-66.87650 1.30046, -66.87826 1.29667...",2020-11-30,0.0,0.0
2160707,32807.0,-7448767.66,1.448653e+05,-7438767.66,1.348653e+05,"POLYGON ((-66.87650 1.30046, -66.87826 1.29667...",2020-12-31,0.0,0.0


In [None]:
municipios = ColombianMunicipalities().get_geometry()
municipios = municipios.to_crs(epsg=3857)

In [None]:
col = 'malaria_sivigila_4_num_cases_sum'
display_df = df
display_df = display_df.to_crs(epsg=3857)


ax = municipios.plot(color = 'gray', alpha = 0.5, figsize =(10,10))
display_df.plot(column = col, ax = ax, legend = True, cmap='OrRd')
cx.add_basemap(ax, source=cx.providers.Esri.WorldImagery)
ax.set_title('Casos de Dengue entre 2018-2021', fontsize=25)