# Imports

In [2]:
import pandas as pd
import numpy as np
import datetime
from helper_metrics import count_missing_district, count_missing_district_total

# Load data

In [3]:
df = pd.read_csv("data/semiyearly_chosen_columns.csv").iloc[:,1:]
df

Unnamed: 0,date,district,total population,Under-Five Population,GAM,MAM,SAM,GAM Prevalence,SAM Prevalence,phase3plus_perc_x,...,Total alarms,n_conflict_total,Average of centy,Average of centx,prevalence_6lag,next_prevalence,month,increase,increase_numeric,district_encoded
0,2017-07-01,Adan Yabaal,65262.96000,13052.59200,4819.01697,3733.04131,1085.97565,0.36920,0.08320,0.18000,...,2.16667,,3.54944,46.54467,,0.35100,7,False,-0.01820,0
1,2017-07-01,Lughaye,70268.22000,14053.64400,5334.76326,4220.30929,1114.45397,0.37960,0.07930,0.36000,...,2.66667,1.00000,10.64738,43.57812,,0.16900,7,False,-0.21060,58
2,2017-07-01,Buuhoodle,71317.71000,14263.54200,4858.16241,3652.89311,1205.26930,0.34060,0.08450,0.37000,...,2.33333,2.50000,8.46016,46.66129,,0.20280,7,False,-0.13780,23
3,2017-07-01,Luuq,100476.76500,20095.35300,8673.15435,7366.95641,1306.19795,0.43160,0.06500,0.21000,...,7.83333,1.50000,3.79293,42.69760,,0.39260,7,False,-0.03900,59
4,2017-07-01,Burtinle,112734.27000,22546.85400,10200.19675,8500.16396,1700.03279,0.45240,0.07540,0.22000,...,3.66667,,7.80220,48.39912,,0.37960,7,False,-0.07280,22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
657,2021-07-01,Jariiban,,32671.60000,10890.00000,,1430.00000,0.33332,0.04377,0.19000,...,2.16667,,7.16378,48.99860,0.34857,,7,,,50
658,2021-07-01,Caluula,,16168.60000,5560.00000,,870.00000,0.34388,0.05381,0.16000,...,3.16667,1.00000,11.66822,50.79402,0.35925,,7,,,28
659,2021-07-01,Qoryooley,,25309.00087,11420.00000,,2160.00000,0.45122,0.08535,0.08000,...,6.16667,3.50000,1.93456,44.44943,0.45939,,7,,,65
660,2021-07-01,Baki,,11187.80000,3470.00000,,640.00000,0.31016,0.05721,0.37000,...,0.66667,1.00000,10.28566,43.73210,0.22769,,7,,,7


# Fix MAM

In [4]:
df.MAM = df.GAM - df.SAM

# Fix total_population

In [5]:
df.date = pd.to_datetime(df.date)
id_columns = ["date","district","total population"]

In [6]:
df[id_columns].iloc[-73:]

Unnamed: 0,date,district,total population
589,2021-07-01,Baidoa,
590,2021-07-01,Adan Yabaal,
591,2021-07-01,Ceel Waaq,
592,2021-07-01,Wanla Weyn,
593,2021-07-01,Zeylac,
...,...,...,...
657,2021-07-01,Jariiban,
658,2021-07-01,Caluula,
659,2021-07-01,Qoryooley,
660,2021-07-01,Baki,


### Show the population distribution for a randomly selected district over time

In [7]:
district_number = np.random.randint(0,73)
df[df.district==df.district[0]][id_columns]

Unnamed: 0,date,district,total population
0,2017-07-01,Adan Yabaal,65262.96
127,2018-01-01,Adan Yabaal,65262.96
182,2018-07-01,Adan Yabaal,37859.15495
234,2019-01-01,Adan Yabaal,37859.15495
360,2019-07-01,Adan Yabaal,37859.15495
430,2020-01-01,Adan Yabaal,35879.12501
448,2020-07-01,Adan Yabaal,35878.0
535,2021-01-01,Adan Yabaal,35879.0
590,2021-07-01,Adan Yabaal,


# Carry out an LOCF time series imputation for each district

In [9]:
for i in range(74):
    # retrieve district name
    district = df.district[i]
    # retrieve index of the district for 2021-07-01
    index = df[(df.date=="2021-07-01")&(df.district==district)].index
    try:
        # set its value to 2021-01-01
        value = df[(df.date=="2021-01-01")&(df.district==district)]['total population'].values[0]
    except:
        # if that value does not exist, just use its previous recorded population value
        value = df[df.district==district]['total population'].values[-2]

    # change nans to the previous population value which is now carried forward
    df.loc[index,'total population'] = value

### Check 2021 `total_population` values

In [10]:
df.iloc[-73:]

Unnamed: 0,date,district,total population,Under-Five Population,GAM,MAM,SAM,GAM Prevalence,SAM Prevalence,phase3plus_perc_x,...,Total alarms,n_conflict_total,Average of centy,Average of centx,prevalence_6lag,next_prevalence,month,increase,increase_numeric,district_encoded
589,2021-07-01,Baidoa,515337.00000,124149.80000,56660.00000,45680.00000,10980.00000,0.45638,0.08844,0.17000,...,7.83333,12.00000,3.26636,43.63929,0.48097,,7,,,6
590,2021-07-01,Adan Yabaal,35879.00000,17190.00000,4930.00000,4220.00000,710.00000,0.28679,0.04130,0.10000,...,1.33333,1.50000,3.54944,46.54467,0.27735,,7,,,0
591,2021-07-01,Ceel Waaq,39782.00000,17519.20000,6210.00000,5290.00000,920.00000,0.35447,0.05251,0.15000,...,6.50000,1.25000,2.67898,41.44526,0.26650,,7,,,34
592,2021-07-01,Wanla Weyn,66459.00000,63909.40000,28650.00000,22910.00000,5740.00000,0.44829,0.08981,0.07000,...,4.66667,2.40000,2.79164,44.90647,0.47141,,7,,,72
593,2021-07-01,Zeylac,72825.00000,11246.40000,2690.00000,2350.00000,340.00000,0.23919,0.03023,0.15000,...,1.16667,1.00000,10.89293,43.16827,0.18538,,7,,,76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
657,2021-07-01,Jariiban,74373.00000,32671.60000,10890.00000,9460.00000,1430.00000,0.33332,0.04377,0.19000,...,2.16667,,7.16378,48.99860,0.34857,,7,,,50
658,2021-07-01,Caluula,51766.00000,16168.60000,5560.00000,4690.00000,870.00000,0.34388,0.05381,0.16000,...,3.16667,1.00000,11.66822,50.79402,0.35925,,7,,,28
659,2021-07-01,Qoryooley,207773.00000,25309.00087,11420.00000,9260.00000,2160.00000,0.45122,0.08535,0.08000,...,6.16667,3.50000,1.93456,44.44943,0.45939,,7,,,65
660,2021-07-01,Baki,99157.00000,11187.80000,3470.00000,2830.00000,640.00000,0.31016,0.05721,0.37000,...,0.66667,1.00000,10.28566,43.73210,0.22769,,7,,,7


In [11]:
count_missing_district(df)

Unnamed: 0_level_0,date,total population,Under-Five Population,GAM,MAM,SAM,GAM Prevalence,SAM Prevalence,phase3plus_perc_x,rainfall,...,Total alarms,n_conflict_total,Average of centy,Average of centx,prevalence_6lag,next_prevalence,month,increase,increase_numeric,district_encoded
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Adan Yabaal,0,0,0,0,0,0,0,0,0,0,...,0,3,0,0,1,1,0,1,1,0
Afgooye,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,1,1,0
Afmadow,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,1,1,0
Baardheere,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,1,1,0
Badhaadhe,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wanla Weyn,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,1,1,0
Xarardheere,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,1,1,0
Xudun,0,0,0,0,0,0,0,0,0,0,...,0,7,0,0,1,1,0,1,1,0
Xudur,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,1,1,0
