# Air Demo Notebook

## Project-level functions

### Extract

The project-level `extract` function extracts a DataFrame for a given `theme` and `indicator` from an Excel or OpenDocument workbook hosted on https://data.gov.uk/ based on the metadata contained in the [datasets.py](https://prose.io/#Defra-Data-Science-Centre-of-Excellence/oiflib/edit/initial/src/oiflib/datasets.py) dictionary.

In [1]:
from oiflib.extract import extract

## Air One

In [None]:
from oiflib.air.one.enrich import enrich_air_one
from oiflib.air.one.transform import transform_air_one
from oiflib.air.one.validate import vapolidate_air_one_extracted, validate_air_one_transformed, validate_air_one_enriched

In [2]:
air_one_extracted = extract(theme="air", dataset="one")
air_one_extracted.head()

Unnamed: 0,ShortPollName,NFRCode,SourceName,1990,1995,1998,1999,2000,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,CO,1A1a,Autogenerators,0.0,0.0,0.000861,0.000987,0.001225,0.001165,0.001197,...,0.072842,0.188518,0.38095,0.773427,1.108654,1.654888,2.648169,4.194255,4.936837,5.536873
1,CO,1A1a,Miscellaneous industrial/commercial combustion,0.37292,0.285545,0.104246,0.108912,0.118016,0.12327,0.126718,...,0.13508,0.12743,0.144871,0.148156,0.17232,0.125973,0.133799,0.127731,0.144326,0.136516
2,CO,1A1a,Power stations,91.485663,85.075695,48.035092,43.356297,52.875139,53.176906,52.253263,...,49.778303,54.550899,56.543332,67.445893,65.244769,55.655886,46.738127,30.714808,30.172769,38.88195
3,CO,1A1a,Public sector combustion,0.009175,0.015521,0.014372,0.01441,0.012844,0.013138,0.014227,...,0.013591,0.015413,0.017175,0.017028,0.018264,0.018127,0.019443,0.019173,0.022132,0.021411
4,CO,1A1b,Refineries - combustion,4.455526,4.998781,5.24891,4.862012,3.785558,2.391528,2.788338,...,4.460696,3.28726,2.8017,3.162767,3.266939,4.154463,5.523977,4.606405,4.050079,3.934826


In [3]:
air_one_extracted_validated = validate_air_one_extracted(air_one_extracted)
air_one_extracted_validated.head()

Unnamed: 0,ShortPollName,NFRCode,SourceName,1990,1995,1998,1999,2000,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,CO,1A1a,Autogenerators,0.0,0.0,0.000861,0.000987,0.001225,0.001165,0.001197,...,0.072842,0.188518,0.38095,0.773427,1.108654,1.654888,2.648169,4.194255,4.936837,5.536873
1,CO,1A1a,Miscellaneous industrial/commercial combustion,0.37292,0.285545,0.104246,0.108912,0.118016,0.12327,0.126718,...,0.13508,0.12743,0.144871,0.148156,0.17232,0.125973,0.133799,0.127731,0.144326,0.136516
2,CO,1A1a,Power stations,91.485663,85.075695,48.035092,43.356297,52.875139,53.176906,52.253263,...,49.778303,54.550899,56.543332,67.445893,65.244769,55.655886,46.738127,30.714808,30.172769,38.88195
3,CO,1A1a,Public sector combustion,0.009175,0.015521,0.014372,0.01441,0.012844,0.013138,0.014227,...,0.013591,0.015413,0.017175,0.017028,0.018264,0.018127,0.019443,0.019173,0.022132,0.021411
4,CO,1A1b,Refineries - combustion,4.455526,4.998781,5.24891,4.862012,3.785558,2.391528,2.788338,...,4.460696,3.28726,2.8017,3.162767,3.266939,4.154463,5.523977,4.606405,4.050079,3.934826


In [4]:
air_one_transformed = transform_air_one(air_one_extracted_validated)
air_one_transformed.head()

Unnamed: 0,ShortPollName,EmissionYear,Emission
0,NOx,1990,2397.847344
1,PM2.5,1990,174.144903
2,SO2,1990,3134.835121
3,NMVOC,1990,2109.138181
4,NH3,1990,232.760876


In [5]:
air_one_transformed_validated = validate_air_one_transformed(air_one_transformed)
air_one_transformed_validated.head()

Unnamed: 0,ShortPollName,EmissionYear,Emission
0,NOx,1990,2397.847344
1,PM2.5,1990,174.144903
2,SO2,1990,3134.835121
3,NMVOC,1990,2109.138181
4,NH3,1990,232.760876


In [6]:
air_one_enriched = enrich_air_one(air_one_transformed_validated)
air_one_enriched.head()

Unnamed: 0,ShortPollName,EmissionYear,Emission,Index
0,NOx,1990,2397.847344,100.0
1,PM2.5,1990,174.144903,100.0
2,SO2,1990,3134.835121,100.0
3,NMVOC,1990,2109.138181,100.0
4,NH3,1990,232.760876,100.0


In [7]:
air_one_enriched_validated = validate_air_one_enriched(air_one_enriched)
air_one_enriched_validated

Unnamed: 0,ShortPollName,EmissionYear,Emission,Index
0,NOx,1990,2397.847344,100.000000
1,PM2.5,1990,174.144903,100.000000
2,SO2,1990,3134.835121,100.000000
3,NMVOC,1990,2109.138181,100.000000
4,NH3,1990,232.760876,100.000000
...,...,...,...,...
110,NOx,2018,604.799111,25.222586
111,PM2.5,2018,83.142806,47.743462
112,SO2,2018,118.684955,3.786003
113,NMVOC,2018,526.173425,24.947319


## Air Two

In [None]:
from oiflib.air.two.enrich import enrich_air_two
from oiflib.air.two.transform import transform_air_two
from oiflib.air.two.validate import validate_air_two_extracted, validate_air_two_transformed, validate_air_two_enriched

In [8]:
air_two_extracted = extract(theme="air", dataset="two")
air_two_extracted.head()

Unnamed: 0,NCFormat,IPCC,BaseYear,1990,1995,1998,1999,2000,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Agriculture,1A4ci_Agriculture/Forestry/Fishing:Stationary,448.236698,448.236698,549.552528,367.605127,438.324328,270.438785,376.522194,356.921967,...,196.404229,189.943964,197.062955,198.134368,191.095244,193.034749,191.176744,196.278836,199.425277,210.582748
1,,1A4cii_Agriculture/Forestry/Fishing:Off-road,2719.368825,2719.368825,2719.368825,2725.387975,2671.554418,2593.14938,2548.670148,2540.41398,...,2153.009015,2167.667714,2195.136386,2261.784209,2234.870631,2258.096455,2353.52913,2486.566932,2511.366672,2506.152075
2,,2D1_Lubricant_Use,7.767285,7.767285,9.314177,5.806529,5.89737,3.859179,3.64574,3.812217,...,0.654481,1.092524,0.948288,0.462465,0.531016,0.368654,0.285644,0.245859,0.24725,0.077642
3,,3A1a Enteric Fermentation - dairy cows,4937.54616,4937.54616,4586.433364,4373.198388,4449.788727,4224.340265,4092.551756,4064.182392,...,3440.086625,3480.483444,3431.86494,3362.599433,3415.878472,3581.989287,3629.92743,3549.68823,3597.600232,3575.281527
4,,3A1b Enteric Fermentation - other cattle,6909.494541,6909.494541,6736.106166,6539.617956,6516.940354,6368.233216,5899.738748,5650.652492,...,5834.378682,5901.872202,5784.199641,5714.541864,5710.529243,5695.926779,5654.749455,5702.018023,5649.271323,5595.92607


In [9]:
air_two_extracted_validated = validate_air_two_extracted(air_two_extracted)
air_two_extracted_validated.head()

Unnamed: 0,NCFormat,IPCC,BaseYear,1990,1995,1998,1999,2000,2001,2002,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,Agriculture,1A4ci_Agriculture/Forestry/Fishing:Stationary,448.236698,448.236698,549.552528,367.605127,438.324328,270.438785,376.522194,356.921967,...,196.404229,189.943964,197.062955,198.134368,191.095244,193.034749,191.176744,196.278836,199.425277,210.582748
1,,1A4cii_Agriculture/Forestry/Fishing:Off-road,2719.368825,2719.368825,2719.368825,2725.387975,2671.554418,2593.14938,2548.670148,2540.41398,...,2153.009015,2167.667714,2195.136386,2261.784209,2234.870631,2258.096455,2353.52913,2486.566932,2511.366672,2506.152075
2,,2D1_Lubricant_Use,7.767285,7.767285,9.314177,5.806529,5.89737,3.859179,3.64574,3.812217,...,0.654481,1.092524,0.948288,0.462465,0.531016,0.368654,0.285644,0.245859,0.24725,0.077642
3,,3A1a Enteric Fermentation - dairy cows,4937.54616,4937.54616,4586.433364,4373.198388,4449.788727,4224.340265,4092.551756,4064.182392,...,3440.086625,3480.483444,3431.86494,3362.599433,3415.878472,3581.989287,3629.92743,3549.68823,3597.600232,3575.281527
4,,3A1b Enteric Fermentation - other cattle,6909.494541,6909.494541,6736.106166,6539.617956,6516.940354,6368.233216,5899.738748,5650.652492,...,5834.378682,5901.872202,5784.199641,5714.541864,5710.529243,5695.926779,5654.749455,5702.018023,5649.271323,5595.92607


In [10]:
air_two_transformed = transform_air_two(air_two_extracted_validated)
air_two_transformed.head()

Unnamed: 0,NCFormat,IPCC,EmissionYear,CO2 Equiv
0,Agriculture,1A4ci_Agriculture/Forestry/Fishing:Stationary,1990,448.236698
1,Agriculture,1A4cii_Agriculture/Forestry/Fishing:Off-road,1990,2719.368825
2,Agriculture,2D1_Lubricant_Use,1990,7.767285
3,Agriculture,3A1a Enteric Fermentation - dairy cows,1990,4937.54616
4,Agriculture,3A1b Enteric Fermentation - other cattle,1990,6909.494541


In [11]:
air_two_transformed_validated = validate_air_two_transformed(air_two_transformed)
air_two_transformed_validated.head()

Unnamed: 0,NCFormat,IPCC,EmissionYear,CO2 Equiv
0,Agriculture,1A4ci_Agriculture/Forestry/Fishing:Stationary,1990,448.236698
1,Agriculture,1A4cii_Agriculture/Forestry/Fishing:Off-road,1990,2719.368825
2,Agriculture,2D1_Lubricant_Use,1990,7.767285
3,Agriculture,3A1a Enteric Fermentation - dairy cows,1990,4937.54616
4,Agriculture,3A1b Enteric Fermentation - other cattle,1990,6909.494541


In [12]:
air_two_enriched = enrich_air_two(air_two_transformed_validated)
air_two_enriched.head()

Unnamed: 0,OIF_category,EmissionYear,CO2 Equiv
0,Agriculture,1990,33483.078737
1,Agriculture,1995,32023.748698
2,Agriculture,1998,31845.280086
3,Agriculture,1999,31873.301211
4,Agriculture,2000,30221.892227


In [13]:
air_two_enriched_validated = validate_air_two_enriched(air_two_enriched)
air_two_enriched_validated

Unnamed: 0,OIF_category,EmissionYear,CO2 Equiv
0,Agriculture,1990,33483.078737
1,Agriculture,1995,32023.748698
2,Agriculture,1998,31845.280086
3,Agriculture,1999,31873.301211
4,Agriculture,2000,30221.892227
...,...,...,...
110,Waste,2014,17708.697346
111,Waste,2015,16940.514733
112,Waste,2016,16437.123344
113,Waste,2017,16805.735929
