# 2016 GVA Publication

#### Import packages and define paths to directories

In [1]:
# this will automatically reload the gva package when changes have been made
#%load_ext autoreload
#%autoreload 2

import pandas as pd
import numpy as np
import os
import sys

# find path to root directory
if os.path.exists(os.path.abspath(os.path.join('src'))):
    module_path = os.path.abspath(os.path.join(''))
else: 
    module_path = os.path.abspath(os.path.join('..'))

# add root directory to sys.path so that package can be found
if module_path not in sys.path:
    sys.path.append(module_path)
        
# import package functions
from src.functions import read_abs, read_charities, read_tourism, read_gva, read_sic91, combine_gva, aggregate_data, make_table

# specify output directory
output_dir = os.path.join(module_path, 'publications/Nov_2016')

#package_dir = os.path.dirname(os.path.abspath(__file__))
#output_dir = os.path.join(module_path, 'outputs')

# set path to raw data excel file
path = '/Volumes/Data/EAU/Statistics/Economic Estimates/2017 publications/November publication/GVA - current/Working_file_dcms_V11 2016 Data.xlsx'

## Part 1 - Read in, clean, and aggregate data

#### Read in and clean up raw data in excel file

In [2]:
abs = read_abs(path)
charities = read_charities(path)
tourism = read_tourism(path)
gva = read_gva(path)
sic91 = read_sic91(path)

#### Combine sic level data read in above into a single dataset

In [3]:
combined_gva = combine_gva(abs, gva, sic91)

#### Aggregate data to sector level

In [5]:
agg = aggregate_data(combined_gva, gva, tourism, charities)
agg

Unnamed: 0,year,sector,sub-sector,gva
0,2010,Creative Industries,Crafts,2.919926e+02
1,2011,Creative Industries,Crafts,3.076828e+02
2,2012,Creative Industries,Crafts,2.843576e+02
3,2013,Creative Industries,Crafts,2.158877e+02
4,2014,Creative Industries,Crafts,3.955191e+02
5,2015,Creative Industries,Crafts,3.677077e+02
6,2016,Creative Industries,Crafts,4.213056e+02
7,2010,Creative Industries,Publishing,2.448452e+03
8,2011,Creative Industries,Publishing,2.066002e+03
9,2012,Creative Industries,Publishing,2.472863e+03


#### Save aggregated data to ouputs directory

In [None]:
agg.to_csv(os.path.join(output_dir, 'gva_aggregate_data_2016.csv'), index=False)

## Part 2 - Produce outputs

#### Read in aggregate data (This is so Part 1 doesn't need to be rerun)

In [4]:
agg = pd.read_csv(os.path.join(output_dir, 'gva_aggregate_data_2016.csv'))

#### Create some summary tables

In [5]:
make_table(agg, 'All')

year,2010,2011,2012,2013,2014,2015,2016
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Civil Society (Non-market charities),19.25028,19.64664,16.1064,18.66608,19.78424,21.28685,20.93322
Creative Industries,63.4246,67.24801,70.72261,74.86772,79.95282,85.30785,91.8283
Cultural Sector,21.06353,21.74142,22.62486,23.30614,24.52089,25.6374,26.76584
Digital Sector,94.50272,99.55116,101.73033,106.80038,108.39366,110.15545,116.49352
Gambling,8.401,9.29,9.883,9.97,10.389,11.051,10.647
Sport,7.02548,7.39687,7.88085,7.49058,7.75243,8.61018,9.03549
Telecoms,24.745,25.454,26.013,28.081,30.012,30.372,29.955
Tourism,49.15,53.947,57.343,58.997,60.43757,64.64154,66.06489
All DCMS sectors,192.67034,205.18862,209.87861,220.82625,227.49766,239.83604,248.51526
UK,1422.028,1458.82,1505.718,1564.43,1638.722,1684.937,1744.435


#### Save summary tables to a dictionary so they can be easily used by the test script

In [6]:
summary_tables = {
    'gva_current': make_table(agg, 'All'),
    'gva_current_indexed': make_table(agg, 'All', indexed=True),
    'creative': make_table(agg, 'Creative Industries'),
    'digital': make_table(agg, 'Digital Sector'),
    'culture': make_table(agg, 'Cultural Sector'),
}
