In [72]:
%config Completer.use_jedi = False
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# Plotly
import plotly.express as px
import plotly
import plotly.graph_objects as go
import plotly.io as pio

pio.renderers.default = 'notebook'

In [2]:
pop_by_sex = pd.read_csv('kenya-population-by-sex-and-county.csv', header=6)
births = pd.read_csv('births-in-the-last-12-months-by-place-of-occurrence-and-county-2019-census-volume-iv.csv', header = 'infer')
pop_household = pd.read_csv('population-houseshold-data.csv', header = 6)

In [3]:
name = {'name':'County', 'COUNTY': 'County'}
pop_by_sex.rename(columns=name, inplace=True)
births.rename(columns=name, inplace=True)
pop_household.rename(columns=name, inplace=True)

In [None]:
pop_by_sex.head(10)

In [None]:
births.head(10)

In [None]:
pop_household.head(10)

In [4]:
births.set_index('County', inplace=True)
pop_by_sex.set_index('County', inplace=True)
pop_household.set_index('County', inplace=True)

In [5]:
rural_urban_births = births.loc[['Kenya'.upper(),'Rural','Urban']]
births.drop(['Rural','Urban', 'Kenya'.upper()], axis = 0, inplace=True)
pop_household.drop(['Kenya'], axis = 0, inplace=True)
pop_by_sex.drop(['Kenya'], axis = 0, inplace=True)

In [6]:
print(f"Pop Shape: {pop_by_sex.shape}, births: {births.shape}, Households: {pop_household.shape}")

Pop Shape: (47, 4), births: (47, 6), Households: (47, 3)


### Merging the Tables

In [7]:
# Sorting the dataframes by index to ease merging

births.sort_index(ascending = True, inplace=True)
pop_by_sex.sort_index(ascending = True, inplace= True)
pop_household.sort_index(ascending= True, inplace=True)

births.index = pop_by_sex.index

In [84]:
# Merging the various dataframes
cols = {'Total_x': 'Total Sex', 'Total_y': 'Total Births', 'Don\'t Know': 'Unknown Births', 'Not Stated': 'Unstated Births'}

kenya_census = pd.merge(pop_by_sex, births, left_index=True, right_index=True)

kenya_census = pd.merge(kenya_census, pop_household, left_index=True, right_index=True)
kenya_census.rename(columns=cols, inplace=True)

kenya_census.drop(columns=['Population'], inplace=True)
kenya_census.rename(columns={'Total Sex': 'Population', 'Total Births': 'Births'}, inplace=True)

kenya_census.head(10)

Unnamed: 0_level_0,Male,Female,Intersex,Population,Births,Health Facility,Non Health Facility,Unknown Births,Unstated Births,Per Cent in Health Facility,No.of Households,Avg Household size
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Baringo,336322,330428,13,666763,19697.0,14761.0,4879.0,57.0,-,74.9,142518,47
Bomet,434287,441379,23,875689,24647.0,21300.0,3326.0,21.0,-,86.4,187641,47
Bungoma,812146,858389,35,1670570,47722.0,42428.0,5238.0,56.0,-,88.9,358796,46
Busia,426252,467401,28,893681,25597.0,22974.0,2606.0,17.0,-,89.8,198152,45
Elgeyo-Marakwet,227317,227151,12,454480,13212.0,11894.0,1312.0,6.0,-,90.0,99861,45
Embu,304208,304367,24,608599,14556.0,13780.0,775.0,1.0,-,94.7,182743,33
Garissa,458975,382344,34,841353,16414.0,10434.0,5563.0,417.0,-,63.6,141394,59
Homa Bay,539560,592367,23,1131950,34833.0,31189.0,3621.0,23.0,-,89.5,262036,43
Isiolo,139510,128483,9,268002,8037.0,5680.0,2333.0,24.0,-,70.7,58072,46
Kajiado,557098,560704,38,1117840,36244.0,28478.0,7698.0,68.0,-,78.6,316179,35


In [None]:
kenya_census.info()

## Stacked Chart showing gender

In [None]:
figure_1 = go.Figure(
    data = [
        go.Bar(name = 'Men', x = kenya_census['Male'], y = kenya_census.index, orientation='h'),
        go.Bar(name = 'Women', x = kenya_census['Female'], y = kenya_census.index, orientation='h')
    ])

figure_1.update_layout(title = 'Gender by County', barmode = 'stack', height = 900).show()

### Top 10 Counties by Population

In [None]:
top_10_counties = pd.DataFrame(kenya_census.groupby('County')['Population'].sum().nlargest(10).sort_values(ascending = False))

figure_2 = px.scatter(top_10_counties, x = top_10_counties.index, y = top_10_counties.Population,
                     size = 'Population',
                     size_max = 90,
                     color = top_10_counties.index,
                     title = 'Top 10 Counties by Population')

# Legend
figure_2.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="right",
    x=0.99
))

figure_2.show()

### Counties by Birth

In [None]:
top_10_birth = pd.DataFrame(kenya_census.groupby('County')['Births'].sum().nlargest(10).sort_values(ascending = False))

figure_3 = px.scatter(top_10_birth, x = top_10_birth.index, y = top_10_birth['Births'],
                     size = 'Births',
                     size_max = 90,
                     color = top_10_counties.index,
                     title = 'Top 10 Counties by Births')

# Legend
figure_3.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="right",
    x=0.99
))

figure_3.show()

In [None]:
top_10_birth = pd.DataFrame(kenya_census.groupby('County')['Births'].sum().nlargest(10).sort_values(ascending = False))

top_10_birth.info()

## Drawing the Choropleths

In [34]:
# Reading the json file to allow plotting the choropleth
import json, geojson

with open('kenya.geojson', 'r') as file:
    kenya = json.loads(file.read())

In [35]:
kenya['features'][22]['properties']

{'OBJECTID': 23,
 'ID_': 1,
 'COUNTY_NAM': 'TURKANA',
 'CONST_CODE': 123,
 'CONSTITUEN': 'TURKANA NORTH',
 'COUNTY_COD': 23,
 'Shape_Leng': 14.1898741549,
 'Shape_Area': 5.70813539469}

In [85]:
indices = {'Elgeyo-Marakwet': 'Elegeyo-Marakwet', 'Taita-Taveta': 'Taita Taveta', 'Tharaka-Nithi': 'Tharaka - Nithi'}
kenya_census.rename(index=indices, inplace=True)
kenya_census.reset_index(inplace=True)
kenya_census['County'] = kenya_census['County'].apply(lambda x: x.upper())

# Scaling the Population Column
kenya_census['Scaled Population'] = np.log10(kenya_census.Population)

In [86]:
county_id_map = dict() # Stores the county id to enable drawing

# Correlate the name of the county to its ID
for feature in kenya['features']:
    feature['id'] = feature['properties']['COUNTY_COD']
    county_id_map[feature['properties']['COUNTY_NAM']] = feature['id']
    
county_id_map.pop(None)

kenya_census['county code'] = kenya_census['County'].apply(lambda x: county_id_map[x])

In [51]:
county_id_map

{'NAIROBI': 47,
 'MOMBASA': 1,
 'KWALE': 2,
 'KILIFI': 3,
 'TANA RIVER': 4,
 'LAMU': 5,
 'TAITA TAVETA': 6,
 'GARISSA': 7,
 'WAJIR': 8,
 'MANDERA': 9,
 'MARSABIT': 10,
 'ISIOLO': 11,
 'MERU': 12,
 'THARAKA - NITHI': 13,
 'EMBU': 14,
 'KITUI': 15,
 'MACHAKOS': 16,
 'NYANDARUA': 18,
 'NYERI': 19,
 'KIRINYAGA': 20,
 "MURANG'A": 21,
 'KIAMBU': 22,
 'TURKANA': 23,
 'WEST POKOT': 24,
 'SAMBURU': 25,
 'UASIN GISHU': 27,
 'ELEGEYO-MARAKWET': 28,
 'NANDI': 29,
 'BARINGO': 30,
 'LAIKIPIA': 31,
 'NAKURU': 32,
 'NAROK': 33,
 'KAJIADO': 34,
 'MAKUENI': 17,
 'KERICHO': 35,
 'KAKAMEGA': 37,
 'VIHIGA': 38,
 'BUNGOMA': 39,
 'BUSIA': 40,
 'SIAYA': 41,
 'HOMA BAY': 43,
 'MIGORI': 44,
 'KISII': 45,
 'BOMET': 36,
 'NYAMIRA': 46,
 'KISUMU': 42,
 'TRANS NZOIA': 26}

In [110]:
kenya_census.head(10)

375.083380308012

### Drawing the Choropleth

In [111]:
figure_4 = px.choropleth(
    kenya_census,
    locations='county code',
    geojson = kenya,
    color = 'Scaled Population',
    hover_name = 'County',
    hover_data = ['Population'],
    color_continuous_scale = px.colors.diverging.BrBG,
    color_continuous_midpoint = kenya_census['Scaled Population'].mean()
)

figure_4.update_geos(fitbounds = 'locations', visible = False)
figure_4.update_layout(title = 'Population in Counties')

figure_4.show()