In [15]:
import pandas as pd
import geopandas

# prevent false warning
# https://stackoverflow.com/questions/20625582/how-to-deal-with-settingwithcopywarning-in-pandas
pd.options.mode.chained_assignment = None  # default='warn'

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

from dbconf import get_engine

In [20]:
regions_gdf = geopandas.GeoDataFrame.from_postgis("SELECT * FROM shape WHERE type='region'", 
                                                  geom_col='geometry', con=get_engine())

In [23]:
known_regions = list(regions_gdf['name'].unique())

In [69]:
df = pd.read_excel('../input/data.local/lit_dengue_out_loc/WHOAB_summary_withSignalSheet.xlsx', sheet_name=1, 
                  na_values=['None', 'none'],
                  parse_dates=['Report date', 'Start date', 'End date'])

In [70]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89 entries, 0 to 88
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   ID               89 non-null     object        
 1   Regions          89 non-null     object        
 2   Report date      89 non-null     datetime64[ns]
 3   Cases            89 non-null     int64         
 4   Confirmed Cases  89 non-null     int64         
 5   Deaths           83 non-null     float64       
 6   Start date       85 non-null     datetime64[ns]
 7   End date         63 non-null     datetime64[ns]
dtypes: datetime64[ns](3), float64(1), int64(2), object(2)
memory usage: 5.7+ KB


In [71]:
df.head()

Unnamed: 0,ID,Regions,Report date,Cases,Confirmed Cases,Deaths,Start date,End date
0,WHOWB.14.A.1,Dar-es-salaam,2014-05-23,1050,620,3.0,NaT,NaT
1,WHOWB.14.A.1,Kigoma,2014-05-23,1050,620,3.0,NaT,NaT
2,WHOWB.14.A.1,Mwanza,2014-05-23,1050,620,3.0,NaT,NaT
3,WHOWB.14.A.1,Mbeya,2014-05-23,1050,620,3.0,NaT,NaT
4,WHOWB.18.A.1,Dar-es-salaam,2018-03-23,13,11,,2017-12-01,2018-03-21


In [78]:
df2 = df

In [81]:
col_from = list(df2.columns)

In [84]:
col_to = []
prefix = 'lit_dengue_out_loc'
for c in col_from:
    col_to.append(f"{prefix}_{c.lower().replace(' ', '_')}")

In [85]:
col_to

['lit_dengue_out_loc_id',
 'lit_dengue_out_loc_regions',
 'lit_dengue_out_loc_report_date',
 'lit_dengue_out_loc_cases',
 'lit_dengue_out_loc_confirmed_cases',
 'lit_dengue_out_loc_deaths',
 'lit_dengue_out_loc_start_date',
 'lit_dengue_out_loc_end_date']

In [87]:
dict(zip(col_from, col_to))

{'ID': 'lit_dengue_out_loc_id',
 'Regions': 'lit_dengue_out_loc_regions',
 'Report date': 'lit_dengue_out_loc_report_date',
 'Cases': 'lit_dengue_out_loc_cases',
 'Confirmed Cases': 'lit_dengue_out_loc_confirmed_cases',
 'Deaths': 'lit_dengue_out_loc_deaths',
 'Start date': 'lit_dengue_out_loc_start_date',
 'End date': 'lit_dengue_out_loc_end_date'}

In [48]:
given_regions = []

for i, row in df[df['Regions'].notna()].iterrows():
    print(row['Regions'])


Dar-es-salaam
Kigoma
Mwanza
Mbeya
Dar-es-salaam
Dar-es-salaam
Dar-es-salaam
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Dar-es-salaam
Dar-es-salaam
Tanga
Pwani
Morogoro
Dar-es-salaam
Tanga
Pwani
Morogoro
Dar-es-salaam
Tanga
Pwani
Morogoro
Dar-es-salaam
Tanga
Pwani
Morogoro
Dar-es-salaam
Tanga
Pwani
Morogoro
Ruvuma
Dar-es-salaam
Tanga
Lindi
Arusha
Ruvuma
Dar-es-salaam
Tanga
Lindi
Arusha
Dar-es-salaam
Tanga
Pwani
Lindi
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Dar-es-salaam
Tanga
Arusha
Dar-es-salaam
Dodoma
Kagera
Kilimanjaro
Morogoro
Pwani
Ruvuma
Singida
Tanga
Arusha
Dar-es-salaam
Dodoma
Kagera
Kilimanjaro
Morogoro
Pwani
Ruvuma
Singida
Tanga


In [49]:
given_regions = list(df['Regions'].unique())

In [50]:
for r in given_regions:
    if r not in known_regions:
        print(r)

In [77]:
dict(zip(regions_gdf.name, regions_gdf.id))

{'Dodoma': 1,
 'Arusha': 2,
 'Kilimanjaro': 3,
 'Tanga': 4,
 'Morogoro': 5,
 'Pwani': 6,
 'Dar-es-salaam': 7,
 'Lindi': 8,
 'Mtwara': 9,
 'Ruvuma': 10,
 'Iringa': 11,
 'Mbeya': 12,
 'Singida': 13,
 'Tabora': 14,
 'Rukwa': 15,
 'Kusini Unguja': 28,
 'Mjini Magharibi': 29,
 'Kaskazini Pemba': 30,
 'Kusini Pemba': 31,
 'Kigoma': 16,
 'Shinyanga': 17,
 'Kagera': 18,
 'Mwanza': 19,
 'Mara': 20,
 'Manyara': 21,
 'Njombe': 22,
 'Katavi': 23,
 'Simiyu': 24,
 'Geita': 25,
 'Songwe': 26,
 'Kaskazini Unguja': 27}

In [76]:
regions_gdf[['name', 'id']].to_dict(orient='index')

{0: {'name': 'Dodoma', 'id': 1},
 1: {'name': 'Arusha', 'id': 2},
 2: {'name': 'Kilimanjaro', 'id': 3},
 3: {'name': 'Tanga', 'id': 4},
 4: {'name': 'Morogoro', 'id': 5},
 5: {'name': 'Pwani', 'id': 6},
 6: {'name': 'Dar-es-salaam', 'id': 7},
 7: {'name': 'Lindi', 'id': 8},
 8: {'name': 'Mtwara', 'id': 9},
 9: {'name': 'Ruvuma', 'id': 10},
 10: {'name': 'Iringa', 'id': 11},
 11: {'name': 'Mbeya', 'id': 12},
 12: {'name': 'Singida', 'id': 13},
 13: {'name': 'Tabora', 'id': 14},
 14: {'name': 'Rukwa', 'id': 15},
 15: {'name': 'Kusini Unguja', 'id': 28},
 16: {'name': 'Mjini Magharibi', 'id': 29},
 17: {'name': 'Kaskazini Pemba', 'id': 30},
 18: {'name': 'Kusini Pemba', 'id': 31},
 19: {'name': 'Kigoma', 'id': 16},
 20: {'name': 'Shinyanga', 'id': 17},
 21: {'name': 'Kagera', 'id': 18},
 22: {'name': 'Mwanza', 'id': 19},
 23: {'name': 'Mara', 'id': 20},
 24: {'name': 'Manyara', 'id': 21},
 25: {'name': 'Njombe', 'id': 22},
 26: {'name': 'Katavi', 'id': 23},
 27: {'name': 'Simiyu', 'id': 2