In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
df_hh = pd.read_csv("hh_anon.csv")
df_hh.head()

In [None]:
df_hh = df_hh[df_hh["digital"] == 1]

In [None]:
df_firm = pd.read_csv('../ekichabi-server/data/census_data_trimmed.csv')
df_firm.head()

In [None]:
# load Tanzania district shapefile
tanzania = gpd.read_file('districts/Districts.shp')
tanzania["District_N"].unique()

In [None]:
tanzania.head()

In [None]:
tanzania[tanzania["District_N"] == "Bukoba"]

In [None]:
# display map of district in df_firm using geopandas
tanzania.plot(column='District_N', cmap='Set2', figsize=(10,10))

In [None]:
df_firm["district_clean"] = df_firm["district"].apply(lambda x: x.capitalize() if "CBD" not in x else "Bukoba Urban")

In [None]:
d = dict(df_firm["district_clean"].value_counts())
df_firm_count = pd.DataFrame({"district": list(d.keys()), "count": list(d.values())})
df_firm_count

In [None]:
merged_df = pd.merge(left=tanzania, right=df_firm_count, how='left', left_on='District_N', right_on='district')
merged_df["count"].fillna(0, inplace=True)
merged_df.drop(columns=['district'], inplace=True)
merged_df

In [None]:
merged_df['coords'] = merged_df['geometry'].apply(lambda x: x.representative_point().coords[:])
merged_df['coords'] = [coords[0] for coords in merged_df['coords']]

In [None]:
merged_df.plot(column='count', cmap='OrRd', figsize=(18, 18), legend=True)
plt.title("Number of Firm Responses per District")
for idx, row in merged_df.iterrows():
    plt.text(row.coords[0], row.coords[1], s=row["District_N"], horizontalalignment='center', bbox={'facecolor': 'white', 'alpha':0.2, 'pad': 2, 'edgecolor':'none'}, fontsize=8)

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
merged_df.plot(column='count', cmap='OrRd', ax=ax, legend=False)
ax.set_xlim(30.4, 32.25)
ax.set_ylim(-2.5, -0.8)
ax.set_title("Number of Firm Responses per District")
for idx, row in merged_df.iterrows():
    if row["District_N"] not in df_firm_count["district"].unique():
        continue
    ax.text(row.coords[0], row.coords[1], s=row["District_N"], horizontalalignment='center', bbox={'facecolor': 'white', 'alpha':0.2, 'pad': 2, 'edgecolor':'none'}, fontsize=8)

im = ax.imshow(np.arange(3000).reshape((1, 3000)))
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
plt.set_cmap('OrRd')
plt.colorbar(im, cax=cax)

In [None]:
df_hh["district_clean"] = df_hh["district_label"].apply(lambda x: x.split(" ")[0].capitalize())

In [None]:
d = dict(df_hh["district_clean"].value_counts())
df_hh_count = pd.DataFrame({"district": list(d.keys()), "count": list(d.values())})
df_hh_count

In [None]:
merged_df = pd.merge(left=tanzania, right=df_hh_count, how='left', left_on='District_N', right_on='district')
merged_df["count"].fillna(0, inplace=True)
merged_df.drop(columns=['district'], inplace=True)
merged_df

In [None]:
merged_df['coords'] = merged_df['geometry'].apply(lambda x: x.representative_point().coords[:])
merged_df['coords'] = [coords[0] for coords in merged_df['coords']]

In [None]:
merged_df.plot(column='count', cmap='OrRd', figsize=(18, 18), legend=True)
plt.title("Number of Household Responses per District")
for idx, row in merged_df.iterrows():
    plt.text(row.coords[0], row.coords[1], s=row["District_N"], horizontalalignment='center', bbox={'facecolor': 'white', 'alpha':0.2, 'pad': 2, 'edgecolor':'none'}, fontsize=8)

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
merged_df.plot(column='count', cmap='OrRd', ax=ax, legend=False)
ax.set_xlim(30.4, 32.25)
ax.set_ylim(-2.5, -0.8)
ax.set_title("Number of Household Responses per District", fontsize=20)
for idx, row in merged_df.iterrows():
    if row["District_N"] not in df_hh_count["district"].unique():
        continue
    ax.text(row.coords[0], row.coords[1], s=row["District_N"], horizontalalignment='center', bbox={'facecolor': 'white', 'alpha':0.2, 'pad': 2, 'edgecolor':'none'}, fontsize=18)
ax.axis('off')

im = ax.imshow(np.arange(275).reshape((1, 275)))
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
plt.set_cmap('OrRd')
plt.colorbar(im, cax=cax)
plt.tick_params(axis='both', which='major', labelsize=16)