In [21]:
# this tells jupyter to reload our packages every time we run import, so that any changes are included
if __name__ == "__main__":
    %load_ext autoreload
    %autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# 2016 GVA Publication

In [88]:
# the cwd is the directory where the notebook is located
# this means that need to make sure that any paths in the notebook are not relative, 
# else they will fail when used by pytest.
# I think this is a better approach than changing the cwd to  gva/ 
import os
os.getcwd()

'/Users/max.unsted/projects/gva/publications/Nov_2016'

#### Import packages and define paths to directories

In [1]:
# this will automatically reload the gva package when changes have been made
#%load_ext autoreload
#%autoreload 2

import pandas as pd
import numpy as np
import os
import sys

# find path to root directory
if os.path.exists(os.path.abspath(os.path.join('src'))):
    module_path = os.path.abspath(os.path.join(''))
else: 
    module_path = os.path.abspath(os.path.join('../..'))

# add root directory to sys.path so we that our packages can be found
if module_path not in sys.path:
    sys.path.append(module_path)
        
# import package functions
from src.functions import read_abs, read_charities, read_tourism, read_gva, read_sic91, combine_gva, aggregate_data, make_table

# specify output directory
output_dir = os.path.join(module_path, 'publications/Nov_2016')

#package_dir = os.path.dirname(os.path.abspath(__file__))
#output_dir = os.path.join(module_path, 'outputs')

# set path to raw data excel file
path = '/Volumes/Data/EAU/Statistics/Economic Estimates/2017 publications/November publication/GVA - current/Working_file_dcms_V11 2016 Data.xlsx'

## Part 1 - Read in, clean, and aggregate data
This section makes use of the source code in the GVA package's src folder

#### Read in and clean up raw data in excel file

In [2]:
abs = read_abs(path)
charities = read_charities(path)
tourism = read_tourism(path)
gva = read_gva(path)
sic91 = read_sic91(path)

#### Combine sic level data read in above into a single dataset

In [3]:
combined_gva = combine_gva(abs, gva, sic91)

#### Aggregate data to sector level
we want the data all in a single dataset so that sector totals can be easily added to subsector breakdowns, and we do not have to store the values twice, which could be confusing.

In [139]:
agg = aggregate_data(combined_gva, gva, tourism, charities)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  df = pd.concat([subsector_agg, sector_agg], axis=0)


In [144]:
#pd.pivot_table(agg, values='gva', index=['sector', 'sub-sector'], columns=['year'], aggfunc=np.sum)

#### Save aggregated data to ouputs directory

In [5]:
agg.to_csv(os.path.join(output_dir, 'gva_aggregate_data_2016.csv'), index=False)

## Part 2 - Produce written reports
This section makes used of the report_maker package

#### Read in aggregate data

This demonstrates that, once the CSV has been generated and published, all the the publication outputs can be created from it, using the below code.

In [6]:
agg = pd.read_csv(os.path.join(output_dir, 'gva_aggregate_data_2016.csv'))

#### Create some summary tables
the `make_table()` function simply make time series for different subsets of the data

## Create dictionary to be populate html template

In [312]:
context = {}

#### Define tables

In [320]:
gva_current = make_table(agg, 'All')
gva_current_indexed = make_table(agg, 'All', indexed=True)
creative = make_table(agg, 'Creative Industries')
digital = make_table(agg, 'Digital Sector')
culture = make_table(agg, 'Cultural Sector')

#### Define individual stats

In [383]:
perc_change_2010 = (gva_current.loc[:,2016] / gva_current.loc[:,2010] - 1) * 100
perc_change_last_year = (gva_current.loc[:,2016] / gva_current.loc[:,2015] - 1) * 100
perc_of_uk = (gva_current.loc[:, 2016] / gva_current.loc['UK', 2016]) * 100
uk_current_total = gva_current.loc['UK', 2016]

#### Extended tables

In [365]:
gva_current_extended = round(make_table(agg, 'All'), 1)
gva_current_extended['% change 2015-2016'] = round(perc_change_last_year, 1)
gva_current_extended['% change 2010-2016'] = round(perc_change_2010, 1)
gva_current_extended['% of UK GVA 2016'] = round(perc_of_uk, 1)
gva_current_extended = gva_current_extended.reset_index()
# convert column names to strings to ensure order is maintained
#gva_current_extended.columns = [str(i) for i in list(gva_current_extended.columns)]
gva_current_extended_json = gva_current_extended.to_json(orient='split', index=False)
gva_current_extended_json

'{"columns":["sector",2010,2011,2012,2013,2014,2015,2016,"% change 2015-2016","% change 2010-2016","% of UK GVA 2016"],"data":[["Civil Society (Non-market charities)",19.3,19.6,16.1,18.7,19.8,21.3,20.9,-1.7,8.7,1.2],["Creative Industries",63.4,67.2,70.7,74.9,80.0,85.3,91.8,7.6,44.8,5.3],["Cultural Sector",21.1,21.7,22.6,23.3,24.5,25.6,26.8,4.4,27.1,1.5],["Digital Sector",94.5,99.6,101.7,106.8,108.4,110.2,116.5,5.8,23.3,6.7],["Gambling",8.4,9.3,9.9,10.0,10.4,11.1,10.6,-3.7,26.7,0.6],["Sport",7.0,7.4,7.9,7.5,7.8,8.6,9.0,4.9,28.6,0.5],["Telecoms",24.7,25.5,26.0,28.1,30.0,30.4,30.0,-1.4,21.1,1.7],["Tourism",49.2,53.9,57.3,59.0,60.4,64.6,66.1,2.2,34.4,3.8],["All DCMS sectors",192.7,205.2,209.9,220.8,227.5,239.8,248.5,3.6,29.0,14.2],["UK",1422.0,1458.8,1505.7,1564.4,1638.7,1684.9,1744.4,3.5,22.7,100.0]]}'

#### Convert data for charts

In [351]:
totals = make_table(agg, 'All', indexed=True).loc[['All DCMS sectors', 'UK']]
totals = round(totals, 1)
totals = totals.stack()
totals.name = 'value'
totals = totals.reset_index()
totals['year'] = pd.to_datetime(totals['year'], format='%Y')
#totals_ts_data['year']
totals.columns = ["symbol", "date", "price"]
totals = totals.to_json(orient='records')

### Build Written Report

read json template in as python dict - update according, then convert back to json.

In [385]:
from report_maker import build
# from report_maker import build (the function) create_app
# add inits to packages
# considering just passing the global environment to build so we don't have to specify this, or do all of the
# above within a new environment to convert to dict. use context.append().
context = {
    # publication info
    'release_date': '29 November 2017',
    
    # individual stats
    'uk_change_2010': round(perc_change_2010['UK'], 1),
    'uk_change_last_year': round(perc_change_last_year['UK'], 1),
    'uk_change_2010_cvm': 'NOT AVAILABLE',
    'uk_change_last_year_cvm': 'NOT AVAILABLE',
    'dcms_perc_uk': round(perc_of_uk['All DCMS sectors']),
    'dcms_total': uk_current_total,
    
    # infographics
    'money_bag': {'text': '£994'},
    'donut': {'text': '19.2'},
    'up_arrow_1': {'text': '20.6%'},
    'up_arrow_2': {'text': '40.6%'},
    
    # json data
    'totals_chart_data': totals,
    'gva_current_extended_json': gva_current_extended_json,
    
}
build.all(context)

In [46]:
from report_maker import testing

In [47]:
testing.PATH

'/Users/max.unsted/projects/gva/report_maker'

In [None]:
from report_maker import app
if __name__ == "__main__":
    app.run()

In [85]:
import os
cwd = os.getcwd()
os.path.join(cwd, "deep")

'/Users/max.unsted/projects/gva/publications/Nov_2016/deep'

In [19]:
print(__name__)

__main__


## Part 3 - Create Excel Tables
This section makes use of the spreadsheet_maker package. By default it will look for templates in publication_dir/spreadsheets/templates
https://github.com/pytest-dev/pytest/issues/2268

## Testing

#### Dictionary of summary tables for use by the test script

In [7]:
summary_tables = {
    'gva_current': make_table(agg, 'All'),
    'gva_current_indexed': make_table(agg, 'All', indexed=True),
    'creative': make_table(agg, 'Creative Industries'),
    'digital': make_table(agg, 'Digital Sector'),
    'culture': make_table(agg, 'Cultural Sector'),
}
