In [2]:

import pandas as pd

df = pd.read_csv("Experimental-National Population Projections.csv")

df.columns.values

array(['V4_0', 'time', 'time_codelist', 'geography', 'geography_codelist',
       'sex_codelist', 'sex', 'age_codelist', 'age',
       'projectiontype_codelist', 'projectiontype',
       'populationmeasure_codelist', 'populationmeasure'], dtype=object)

In [3]:
df["populationmeasure"].unique()

array(['Births', 'Cross_border_rates', 'Deaths', 'Fertility_assumptions',
       'International_migration(In)', 'International_migration(Out)',
       'International_migration(Net)', 'Mortality_assumptions',
       'Population', 'Total_migration(In)', 'Total_migration(Out)',
       'Total_migration(Net)'], dtype=object)

In [4]:
df["projectiontype"].unique()

array(['Low life expectancy', 'High migration', 'Low migration',
       'Principal', 'Zero net migration (natural change only)',
       'High population', 'High fertility', 'Low population',
       'Low fertility', 'High life expectancy'], dtype=object)

In [5]:

# Migration

wanted = [
        'International_migration(In)', 'International_migration(Out)',
        'International_migration(Net)', 'Total_migration(In)', 
        'Total_migration(Out)', 'Total_migration(Net)'
            ]

migration = df.copy()
for pt in migration["populationmeasure"].unique():
    
    if pt not in wanted:
        migration = migration[migration["populationmeasure"] != pt]
        
        
migration.to_csv("NPP_Migration.csv", index=False)

migration[:3] # sanity check       

Unnamed: 0,V4_0,time,time_codelist,geography,geography_codelist,sex_codelist,sex,age_codelist,age,projectiontype_codelist,projectiontype,populationmeasure_codelist,populationmeasure
30824,2604.0,2016 - 2017,Year,United Kingdom,K02000001,1.0,1.0,0,0,low-life-expectancy,Low life expectancy,international_migration(in),International_migration(In)
30825,4544.0,2016 - 2017,Year,United Kingdom,K02000001,1.0,1.0,1,1,low-life-expectancy,Low life expectancy,international_migration(in),International_migration(In)
30826,4069.0,2016 - 2017,Year,United Kingdom,K02000001,1.0,1.0,2,2,low-life-expectancy,Low life expectancy,international_migration(in),International_migration(In)


In [6]:

# Deaths

deaths = df.copy()
deaths = deaths[deaths["populationmeasure"] == "Deaths"]
deaths = deaths.drop("populationmeasure", axis=1)
deaths = deaths.drop("populationmeasure_codelist", axis=1)

deaths.to_csv("NPP_Deaths.csv", index=False)

deaths[:3] # sanity check

Unnamed: 0,V4_0,time,time_codelist,geography,geography_codelist,sex_codelist,sex,age_codelist,age,projectiontype_codelist,projectiontype
6224,1382.0,2016 - 2017,Year,United Kingdom,K02000001,1.0,1.0,birth,Birth,low-life-expectancy,Low life expectancy
6225,332.0,2016 - 2017,Year,United Kingdom,K02000001,1.0,1.0,0,0,low-life-expectancy,Low life expectancy
6226,108.0,2016 - 2017,Year,United Kingdom,K02000001,1.0,1.0,1,1,low-life-expectancy,Low life expectancy


In [7]:

# Mortality Assumptions

mAssumptions = df.copy()
mAssumptions = mAssumptions[mAssumptions["populationmeasure"] == "Mortality_assumptions"]
mAssumptions = mAssumptions.drop("populationmeasure", axis=1)
mAssumptions = mAssumptions.drop("populationmeasure_codelist", axis=1)

mAssumptions.to_csv("NPP_MortalityAssumptions.csv", index=False)

mAssumptions[:3] # sanity check

Unnamed: 0,V4_0,time,time_codelist,geography,geography_codelist,sex_codelist,sex,age_codelist,age,projectiontype_codelist,projectiontype
94424,354.172807,2016 - 2017,Year,United Kingdom,K02000001,1.0,1.0,birth,Birth,low-life-expectancy,Low life expectancy
94425,82.469208,2016 - 2017,Year,United Kingdom,K02000001,1.0,1.0,0,0,low-life-expectancy,Low life expectancy
94426,26.706628,2016 - 2017,Year,United Kingdom,K02000001,1.0,1.0,1,1,low-life-expectancy,Low life expectancy


In [8]:

# Population

population = df.copy()
population = population[population["populationmeasure"] == "Population"]
population = population.drop("populationmeasure", axis=1)
population = population.drop("populationmeasure_codelist", axis=1)

population.to_csv("NPP_PopulationNumbers.csv", index=False)

population[:3] # sanity check

Unnamed: 0,V4_0,time,time_codelist,geography,geography_codelist,sex_codelist,sex,age_codelist,age,projectiontype_codelist,projectiontype
119824,401630.0,2016,Year,United Kingdom,K02000001,1.0,1.0,0,0,low-life-expectancy,Low life expectancy
119825,402554.0,2016,Year,United Kingdom,K02000001,1.0,1.0,1,1,low-life-expectancy,Low life expectancy
119826,406751.0,2016,Year,United Kingdom,K02000001,1.0,1.0,2,2,low-life-expectancy,Low life expectancy


In [10]:

# Births and Fertility Assumptions

wanted = [
        'Fertility_assumptions', 'Births'
            ]

birthsAndFertility = df.copy()
for pt in birthsAndFertility["populationmeasure"].unique():
    
    if pt not in wanted:
        birthsAndFertility = birthsAndFertility[birthsAndFertility["populationmeasure"] != pt]
        
birthsAndFertility.to_csv("NPP_BirthsAndFertility.csv", index=False)

birthsAndFertility[:3] # sanity check 


Unnamed: 0,V4_0,time,time_codelist,geography,geography_codelist,sex_codelist,sex,age_codelist,age,projectiontype_codelist,projectiontype,populationmeasure_codelist,populationmeasure
0,533.0,2016 - 2017,Year,United Kingdom,K02000001,2.0,2.0,15,15,low-life-expectancy,Low life expectancy,births,Births
1,1457.0,2016 - 2017,Year,United Kingdom,K02000001,2.0,2.0,16,16,low-life-expectancy,Low life expectancy,births,Births
2,3934.0,2016 - 2017,Year,United Kingdom,K02000001,2.0,2.0,17,17,low-life-expectancy,Low life expectancy,births,Births
