# Model Evaluation Notebook

### Synthetic Migration Model Evaluation

In [1]:
import pandas as pd
import numpy as np
import statistics

# Load in true value and projected value for evaluation
forecast_syn = pd.read_csv("../Data/Projection/Iterative_predict_output_Step1_1000_Type2_neg.csv")
forecast_org = pd.read_csv("../Data/True/agesex_true_1000.csv")
forecast_org_tot = pd.read_csv("../Data/True/total_true_1000.csv")

### Age-Sex Cohort APE

In [2]:
# Record the Absolute Percentage Error (APE) for individual Age-Sex Cohort in 2006, 2011
as_syn_06 = forecast_syn["2006"].tolist()
as_org_06 = forecast_org["2006"].tolist()

as_syn_11 = forecast_syn["2011"].tolist()
as_org_11 = forecast_org["2011"].tolist()

APE_06 = []
APE_11 = []
APE_Diff_06 = []
APE_Diff_11 = []
for i in range(len(as_syn_06)):
    APE_06.append(abs(as_syn_06[i] - as_org_06[i]) / as_org_06[i] * 100)
    APE_Diff_06.append(abs(as_syn_06[i] - as_org_06[i]))
    APE_11.append(abs(as_syn_11[i] - as_org_11[i]) / as_org_11[i] * 100)
    APE_Diff_11.append(abs(as_syn_11[i] - as_org_11[i]))

# Generate mean, median, 90 percentile value for APE in 2006, 2011
mean_06 = statistics.mean(APE_06)
median_06 = statistics.median(APE_06)
percentile_90_06 = np.percentile(APE_06, 90)

mean_11 = statistics.mean(APE_11)
median_11 = statistics.median(APE_11)
percentile_90_11 = np.percentile(APE_11, 90)

### Total Population APE

In [3]:
# Record Error Rate in Scale of Total Population in Each Area
index = 0
age_sex = 36
tot_org_06 = forecast_org_tot["2006"].tolist()
tot_org_11 = forecast_org_tot["2011"].tolist()
tot_syn_06 = []
tot_syn_11 = []
tot_area_06 = 0
tot_area_11 = 0
for i in range(len(APE_Diff_06)):
    if (index == age_sex):
        tot_syn_06.append(tot_area_06)
        tot_syn_11.append(tot_area_11)
        tot_area_06 = APE_Diff_06[i]
        tot_area_11 = APE_Diff_11[i]
        index = 1
    else:
        tot_area_06 += APE_Diff_06[i]
        tot_area_11 += APE_Diff_11[i]
        index += 1
tot_syn_06.append(tot_area_06)
tot_syn_11.append(tot_area_11)

# Record total population APE
tot_APE_06 = []
tot_APE_11 = []
for i in range(len(tot_syn_06)):
    tot_APE_06.append(tot_syn_06[i] / tot_org_06[i] * 100)
    tot_APE_11.append(tot_syn_11[i] / tot_org_11[i] * 100)

# Generate mean, median, 90 percentile value for total population APE in 2006, 2011
mean_tot_06 = statistics.mean(tot_APE_06)
median_tot_06 = statistics.median(tot_APE_06)
percentile_tot_90_06 = np.percentile(tot_APE_06, 90)

mean_tot_11 = statistics.mean(tot_APE_11)
median_tot_11 = statistics.median(tot_APE_11)
percentile_tot_90_11 = np.percentile(tot_APE_11, 90)

# Generate Evaluation Result CSV

In [4]:
as_result = [mean_06, median_06, percentile_90_06, mean_11, median_11, percentile_90_11]
tot_result = [mean_tot_06, median_tot_06, percentile_tot_90_06, mean_tot_11, median_tot_11, percentile_tot_90_11]
result_dict = {"Age-Sex Level": as_result, "Total Level": tot_result}
result = pd.DataFrame(result_dict)
row_name = ["mean_2006", "median_2006", "percentile_90_2006", "mean_2011", "median_2011", "percentile_90_2011"]
result.insert(loc = 0, column = "", value = row_name)
result.to_csv("../Data/Evaluation/LSTM_Full_EvaResult_Step1_1000_Type2_neg.csv")