# Notebook for Generating Data for the Synthetic Migration Model

### Read in Remainder Area Name(s)

In [2]:
import pandas as pd
from openpyxl import load_workbook
from syntheticData import selectSynYear, remainderFunc, totPop, agesexPop, recordAgeSex
from syntheticData import growthRate, spec4NPT, popCeil, CSP, MEX, VSG, LINEXP, constrainLINEXP

# Full dataset of age-sex cohort
all_area = pd.read_excel('/../Users/relax/Desktop/DSP/DS_Project/Data/historyData.xlsx', sheet_name = 'Sheet1')
all_area_year = all_area["Year"].tolist()
all_area_name = all_area["SA3 Name"].tolist()
all_area_cpde = all_area["SA3 Code"].tolist()
all_area_pop = all_area.iloc[0:len(all_area_year), 9:]
all_area_pop["Year"] = all_area_year
all_area_pop["SA3 Name"] = all_area_name
all_area_pop["SA3 Code"] = all_area_cpde

# Find our remainder areas from the age-sex cohort dataframe
remainder_list = []
outsider_list = ["Cocos (Keeling) Islands", "Jervis Bay", "Christmas Island"]
for i in range(len(all_area["Year"])):
    if (all_area.iloc[i, 45] == ".." or all_area.iloc[i, 45] < 1000):
        if (all_area.iloc[i, 8] not in remainder_list):
            remainder_list.append(all_area.iloc[i, 8])

# Generate target year dataframe for further usage
all_area_pop_1991 = selectSynYear(all_area_pop, 1991)
all_area_pop_1996 = selectSynYear(all_area_pop, 1996)
all_area_pop_2001 = selectSynYear(all_area_pop, 2001)
all_area_pop_2006 = selectSynYear(all_area_pop, 2006)
all_area_pop_2011 = selectSynYear(all_area_pop, 2011)

# Calculate total population in 1991, 1996, 2001, 2006, 2011 for aggregated remainder area
remainder_1991 = remainderFunc(all_area_pop_1991, remainder_list, outsider_list)
remainder_1996 = remainderFunc(all_area_pop_1996, remainder_list, outsider_list)
remainder_2001 = remainderFunc(all_area_pop_2001, remainder_list, outsider_list)
remainder_2006 = remainderFunc(all_area_pop_2006, remainder_list, outsider_list)
remainder_2011 = remainderFunc(all_area_pop_2011, remainder_list, outsider_list)

# Record the total population for all areas (including the aggregated region)
SA3_Name_list_1991, SA3_Code_list_1991, SA3_totPop_list_1991 = totPop(remainder_1991, all_area_pop_1991, remainder_list)
SA3_Name_list_1996, SA3_Code_list_1996, SA3_totPop_list_1996 = totPop(remainder_1996, all_area_pop_1996, remainder_list)
SA3_Name_list_2001, SA3_Code_list_2001, SA3_totPop_list_2001 = totPop(remainder_2001, all_area_pop_2001, remainder_list)
SA3_Name_list_2006, SA3_Code_list_2006, SA3_totPop_list_2006 = totPop(remainder_2006, all_area_pop_2006, remainder_list)
SA3_Name_list_2011, SA3_Code_list_2011, SA3_totPop_list_2011 = totPop(remainder_2011, all_area_pop_2011, remainder_list)

# Record the age-sex ERP data for all areas (including the aggregated region)
male_list_1991, female_list_1991 = agesexPop(remainder_list, all_area_pop_1991, outsider_list, 18)
male_list_1996, female_list_1996 = agesexPop(remainder_list, all_area_pop_1996, outsider_list, 18)
male_list_2001, female_list_2001 = agesexPop(remainder_list, all_area_pop_2001, outsider_list, 18)
male_list_2006, female_list_2006 = agesexPop(remainder_list, all_area_pop_2006, outsider_list, 18)
male_list_2011, female_list_2011 = agesexPop(remainder_list, all_area_pop_2011, outsider_list, 18)

### Record Age-Sex Cohort Data into the Synthetic Migration Model Workbook

In [11]:
# Load Workbooks
wt_loc = '../VBA_9616/vba.xlsx'
wb_wt = load_workbook(wt_loc)
wb_wt_Labels = wb_wt["Labels"]
wb_wt_AgeSexERPs = wb_wt["AgeSexERPs"]
wb_wt_SmallAreaTotals = wb_wt["SmallAreaTotals"]
wb_wt_Fertility = wb_wt["Fertility"]
wb_wt_Mortality = wb_wt["Mortality"]
wb_wt_Migration = wb_wt["Migration"]

# Write Area Name & Code into Worksheets
row = 4
row_ERP = 5
for i in range(len(SA3_Name_list_1991)):
    row += 1
    col_code = 2
    col_name = 3
    wb_wt_Labels.cell(row, col_code).value = SA3_Code_list_1991[i]
    wb_wt_Labels.cell(row, col_name).value = SA3_Name_list_1991[i]
    wb_wt_SmallAreaTotals.cell(row, col_code).value = SA3_Code_list_1991[i]
    wb_wt_SmallAreaTotals.cell(row, col_name).value = SA3_Name_list_1991[i]
    wb_wt_SmallAreaTotals.cell(row, col_code + 2).value = SA3_totPop_list_1996[i]
    wb_wt_SmallAreaTotals.cell(row, col_name + 2).value = SA3_totPop_list_2001[i]
    wb_wt_Fertility.cell(row + 2, col_code).value = SA3_Code_list_1991[i]
    wb_wt_Fertility.cell(row + 2, col_name).value = SA3_Name_list_1991[i]
    wb_wt_Mortality.cell(row + 2, col_code).value = SA3_Code_list_1991[i]
    wb_wt_Mortality.cell(row + 2, col_name).value = SA3_Name_list_1991[i]
    wb_wt_Migration.cell(row + 1, col_code).value = SA3_Code_list_1991[i]
    wb_wt_Migration.cell(row + 1, col_name).value = SA3_Name_list_1991[i]
    wb_wt_AgeSexERPs.cell(row_ERP, col_code).value = SA3_Code_list_1991[i]
    wb_wt_AgeSexERPs.cell(row_ERP, col_name).value = SA3_Name_list_1991[i]
    row_ERP += 18

# Record age-sex ERP data into worksheet
wb_wt_AgeSexERPs = recordAgeSex(male_list_1996, male_list_2001, female_list_1996, female_list_2001, wb_wt_AgeSexERPs)

# Save the changes of the workbook
wb_wt.save(wt_loc)

# Generate 4-Model SmallAreaTotal Population

### ERPs

In [12]:
# Create dataframe of ERPs value
ERPs = {"Area": SA3_Name_list_1991, '1991': SA3_totPop_list_1991, "2001" : SA3_totPop_list_2001, "2006" : SA3_totPop_list_2006}
df_ERPs = pd.DataFrame(ERPs)

### Annual Average Growth & Growth Rate

In [13]:
# Generate Annual Average Population Growth & Growth Rate
previous_year = 1991
jump_year = 2001
growth, growth_rate = growthRate(previous_year, jump_year, df_ERPs)

### CSP_Model

In [14]:
# Create dataframe of CSP_model output (4 data points of NationalProjection)
target_4NPT = [19272117, 20192983, 21018687, 21786396]
column_4name = ["2001", "2006", "2011", "2016"]
df_CSP_4NP = CSP(SA3_totPop_list_2001, target_4NPT, column_4name)

# Create special NationalProjection Data from the 4-Datapoint NPT list (Only used in VSG_Model)
target_spec_4NPT = spec4NPT(target_4NPT, 5)

# Create dataframe of CSP_model output (16 data points of National Projection)
target_16NPT = [19274701, 19495210, 19723404, 19944673, 20158812, 20365611, 20569450, 20770415, 20968573, 21164010, 21356610, 21546808, 21735843, 21923830, 22110794, 22296754]
column_16name = ["2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016"]
df_CSP_16NP = CSP(SA3_totPop_list_2001, target_16NPT, column_16name)

### MEX_Model

In [15]:
# Generate Population Ceiling K
jumpoff_year = 2001
limit_coef = 5
pop_ceil = popCeil(jumpoff_year, limit_coef, growth_rate, df_ERPs)

# Generate Total Population with MEX_Model
target_final_year = 2016
df_MEX = MEX(jumpoff_year, target_final_year, df_ERPs, pop_ceil, growth_rate)

### VSG_Model

In [16]:
# Generate Total Population with VSG_Model Based on Different NationalProjection Data

# Total Population Based on Tom's 2001 SA2 NationalProjection Data
df_VSG_year_16 = VSG(jumpoff_year, target_final_year, df_ERPs, growth, growth_rate, 1, target_16NPT)

# Total Population Based on Tom's 2001 SA3 NationalProjection Data
df_VSG_year_4 = VSG(jumpoff_year, target_final_year, df_ERPs, growth, growth_rate, 5, target_4NPT)

# Total Population Based on Tom's 2001 SA3 NationalProjection Data with Manipulation (Equally Divided Population Difference between Years)
df_VSG_year_4_spec = VSG(jumpoff_year, target_final_year, df_ERPs, growth, growth_rate, 1, target_spec_4NPT)

### LINEXP_Model

In [17]:
# Generate Total Population with LINEXP_Model (Unconstrained)
jumpoff_year = 2001
target_year = 2016
df_LINEXP = LINEXP(jumpoff_year, target_year, growth, growth_rate, df_ERPs)

# Generate 2 Types of Constrained Total Population with LINEXP_Model
df_constrain_4LINEXP =constrainLINEXP(df_LINEXP, jumpoff_year, target_year, df_CSP_4NP, 5)
df_constrain_16LINEXP =constrainLINEXP(df_LINEXP, jumpoff_year, target_year, df_CSP_16NP, 1)

# Record Average-4-Model Total Population Data

In [18]:
# Case 1: Use 4-Datapoint NationalProjection Data
TP4_2006 = (df_MEX["2006"] + df_LINEXP["2006"] + df_CSP_4NP["2006"] + df_VSG_year_4["2006"]) / 4
TP4_2011 = (df_MEX["2011"] + df_LINEXP["2011"] + df_CSP_4NP["2011"] + df_VSG_year_4["2011"]) / 4
TP4_2016 = (df_MEX["2016"] + df_LINEXP["2016"] + df_CSP_4NP["2016"] + df_VSG_year_4["2016"]) / 4
TP4_dict = {"2006": TP4_2006.tolist(), "2011": TP4_2011.tolist(), "2016": TP4_2016.tolist()}
df_TP4 = pd.DataFrame(TP4_dict)

# Case 2: Use 16-Datapoint NationalProjection Data
TP16_2006 = (df_MEX["2006"] + df_LINEXP["2006"] + df_CSP_16NP["2006"] + df_VSG_year_16["2006"]) / 4
TP16_2011 = (df_MEX["2011"] + df_LINEXP["2011"] + df_CSP_16NP["2011"] + df_VSG_year_16["2011"]) / 4
TP16_2016 = (df_MEX["2016"] + df_LINEXP["2016"] + df_CSP_16NP["2016"] + df_VSG_year_16["2016"]) / 4
TP16_dict = {"2006": TP16_2006.tolist(), "2011": TP16_2011.tolist(), "2016": TP16_2016.tolist()}
df_TP16 = pd.DataFrame(TP16_dict)

# Record the Projection Total Population Data for Year 2006, 2011, 2016 (df_TP4 / df_TP16)
row = 4
for i in range(len(SA3_Name_list_1991)):
    row += 1
    wb_wt_SmallAreaTotals.cell(row, 6).value = df_TP4["2006"][i]
    wb_wt_SmallAreaTotals.cell(row, 7).value = df_TP4["2011"][i]
    wb_wt_SmallAreaTotals.cell(row, 8).value = df_TP4["2016"][i]

# Save the changes of the workbook
wb_wt.save(wt_loc)