In [1]:
# automatically reload packages every time we run import, so that changes are included
if __name__ == "__main__":
    %load_ext autoreload
    %autoreload 2

# 2016 GVA Publication

#### Import packages and define paths to directories

In [2]:
import pandas as pd
import numpy as np
import os
import sys


# add root directory to sys.path so we that our packages can be found
# if packages are in root of this repo then pass a .
# if packages are at the same level as repo then pass ..
# if packages are in a different location, pass the full file path
for path in ['../..', '']:
    if path not in sys.path:
        sys.path.append(path)

# import package functions
from gva_data_processing import *

# set path to raw data excel file
path = '/Volumes/Data/EAU/Statistics/Economic Estimates/2017 publications/November publication/GVA - current/Working_file_dcms_V11 2016 Data.xlsx'

## Part 1 - Read in, clean, and aggregate data
This section makes use of the source code in the GVA package's src folder

#### Read in and clean up raw data in excel file

In [None]:
abs = read_abs(path)
charities = read_charities(path)
tourism = read_tourism(path)
gva = read_gva(path)
sic91 = read_sic91(path)

#### Combine sic level data read in above into a single dataset

In [None]:
combined_gva = combine_gva(abs, gva, sic91)

#### Aggregate data to sector level
we want the data all in a single dataset so that sector totals can be easily added to subsector breakdowns, and we do not have to store the values twice, which could be confusing.

In [None]:
agg = aggregate_data(combined_gva, gva, tourism, charities)
agg

In [None]:
#pd.pivot_table(agg, values='gva', index=['sector', 'sub-sector'], columns=['year'], aggfunc=np.sum)

#### Save aggregated data to ouputs directory

In [None]:
agg.to_csv('gva_aggregate_data_2016.csv', index=False)

## Part 2 - Produce written reports
This section makes used of the report_maker package

#### Read in aggregate data

This demonstrates that, once the CSV has been generated and published, all the the publication outputs can be created from it, using the below code.

In [3]:
agg = pd.read_csv('gva_aggregate_data_2016.csv')

#### Create some summary tables
the `make_table()` function simply make time series for different subsets of the data

## Create dictionary to be populate html template

In [4]:
context = {}

#### Define tables

In [5]:
gva_current = make_table(agg, 'All')
gva_current_indexed = make_table(agg, 'All', indexed=True)
creative = make_table(agg, 'Creative Industries')
digital = make_table(agg, 'Digital Sector')
culture = make_table(agg, 'Cultural Sector')
gva_current.loc['Sport', 2016]

9.03549

#### Define individual stats

In [6]:
perc_change_2010 = (gva_current.loc[:,2016] / gva_current.loc[:,2010] - 1) * 100
perc_change_last_year = (gva_current.loc[:,2016] / gva_current.loc[:,2015] - 1) * 100
perc_of_uk = (gva_current.loc[:, 2016] / gva_current.loc['UK', 2016]) * 100
uk_current_total = gva_current.loc['UK', 2016]

#### Extended tables

In [7]:
gva_current_extended = round(make_table(agg, 'All'), 1)
gva_current_extended['% change 2015-2016'] = round(perc_change_last_year, 1)
gva_current_extended['% change 2010-2016'] = round(perc_change_2010, 1)
gva_current_extended['% of UK GVA 2016'] = round(perc_of_uk, 1)
gva_current_extended = gva_current_extended.reset_index()
# convert column names to strings to ensure order is maintained
#gva_current_extended.columns = [str(i) for i in list(gva_current_extended.columns)]
gva_current_extended_json = gva_current_extended.to_json(orient='split', index=False)

#### Convert data for charts

In [8]:
totals = make_table(agg, 'All', indexed=True).loc[['All DCMS sectors', 'UK']]
totals = round(totals, 1)
totals = totals.stack()
totals.name = 'value'
totals = totals.reset_index()
totals['year'] = pd.to_datetime(totals['year'], format='%Y')
#totals_ts_data['year']
totals.columns = ["symbol", "date", "price"]
totals = totals.to_json(orient='records')

### Build Written Report

read json template in as python dict - update according, then convert back to json.

In [9]:
# considering just passing the global environment to build so we don't have to specify this, or do all of the
# above within a new environment to convert to dict. use context.append().
context = {
    # publication info
    'release_date': '29 November 2017',
    
    # individual stats
    'uk_change_2010': round(perc_change_2010['UK'], 1),
    'uk_change_last_year': round(perc_change_last_year['UK'], 1),
    'uk_change_2010_cvm': 'NOT AVAILABLE',
    'uk_change_last_year_cvm': 'NOT AVAILABLE',
    'dcms_perc_uk': round(perc_of_uk['All DCMS sectors']),
    'dcms_total': uk_current_total,
    
    # infographics
    'money_bag': {'text': '£694'},
    'donut': {'text': '19.2'},
    'up_arrow_1': {'text': '20.6%'},
    'up_arrow_2': {'text': '40.6%'},
    
    # json data
    'totals_chart_data': totals,
    'gva_current_extended_json': gva_current_extended_json,
    
}

In [10]:
from report_maker import build
build(context)

# build markdown (indevelopment) ignore html template, 
#save images as png.

/Users/max.unsted/projects/gva_publication/publications/nov_2016


In [11]:
# from report_maker import create_app
# if __name__ == "__main__":
#     app = create_app()
#     app.run()

## Part 3 - Create Excel Tables
This section makes use of the spreadsheet_maker package. By default it will look for templates in publication_dir/spreadsheets/templates
https://github.com/pytest-dev/pytest/issues/2268

#### Generate templates
This saves a little mannual work, and helps make excel files more predictable for programatically accessing sheets, and more consistent across publications. Rerunning with the same filenames will abort instead of overwritting.

After generating templates, make any mannual adjustments and save. Try to keep these simple and avoid more complex things like images, cell merging, complicated formatting etc, since openpyxl can only read and write the basic feature of an excel file.

In [13]:
from spreadsheet_maker import make_template, populate_template
make_template(
    fn = 'GVA_sector_tables_template.xlsx',
    sheets=[
        "1.1 - GVA current (£bn)",
        "1.1a - GVA current (2010=100)",
        "2.1 - GVA CVM (£bn)",
        "2.1a - GVA CVM (2010=100)"],
    overwrite=True)
make_template(
    fn = 'GVA_subsector_tables_template.xlsx',
    sheets=[
        "1 - Creative Industries-current",
        "2 - Digital Sector-current",
        "3 - Cultural Sector-current",
        "4 - Computer Games-current",
        "5 - Creative Industries-CVM",
        "6 - Digital Sector-CVM",
        "7 - Cultural Sector-CVM",],
    overwrite=True)

populate_template(
    fn = 'GVA_sector_tables.xlsx',
    tables={
        "1.1 - GVA current (£bn)": 'ho',
        "1.1a - GVA current (2010=100)": 'hi',
        "2.1 - GVA CVM (£bn)": 'lo',
        "2.1a - GVA CVM (2010=100)": 'sho',
    }
)

## Part 4 - Testing

#### Dictionary of summary tables for use by the test script

In [None]:
summary_tables = {
    'gva_current': make_table(agg, 'All'),
    'gva_current_indexed': make_table(agg, 'All', indexed=True),
    'creative': make_table(agg, 'Creative Industries'),
    'digital': make_table(agg, 'Digital Sector'),
    'culture': make_table(agg, 'Cultural Sector'),
}
