In [1]:
from gssutils import *

scraper = Scraper('https://www.ons.gov.uk/businessindustryandtrade/internationaltrade/datasets/' + \
                  'internationaltradeinservicesreferencetables')
scraper

## International Trade in Services

The tables show International Trade in Services through a variety of formats. Some tables compare figures over several years but the majority provide the most recent geographic information by industry or product. The tables provide information in as much detail as possible without disclosing the details of any individual companies.

### Distributions

1. International Trade in Services ([MS Excel Spreadsheet](https://www.ons.gov.uk/file?uri=/businessindustryandtrade/internationaltrade/datasets/internationaltradeinservicesreferencetables/alltables2016/internationaltradeinservices2016.xls))


In [2]:
tabs = { tab.name: tab for tab in scraper.distribution().as_databaker() }
tabs.keys()

dict_keys(['Contents', 'Table A0', 'Table B1', 'Table B2', 'Table B3', 'Table C0', 'Table C1 2009-2012', 'Table C1 2013-2016', 'Table C2 2009-2012', 'Table C2 2013-2016', 'Table C3 2009-2012', 'Table C3 2013-2016', 'Table C4 2009-2012', 'Table C4 2013-2016', 'Table C5 2009-2012', 'Table C5 2013-2016', 'Table C6 2009-2012', 'Table C6 2013-2016', 'Table C7 2009-2012', 'Table C7 2013-2016', 'Table D1', 'Table D2'])

In [3]:
next_table = pd.DataFrame()

In [4]:
%%capture

def process_tab(script_name):
    %run "$script_name"
    return new_table

tidy = pd.concat(process_tab(s + '.ipynb') for s in [
    "TabA0", "TabB1", "TabB2", "TabB3", "TabC0",
    "Table C1 2009-2012", "Table C1 2013-2016", "Table C2 2009-2012", "Table C2 2013-2016",
    "Table C3 2009-2012", "Table C3 2013-2016",
    "Table C4 2009-2012", "Table C4 2013-2016",
    "Table C5 2009-2012", "Table C5 2013-2016",
    "Table C6 2009-2012", "Table C6 2013-2016",
    "Table C7 2009-2012", "Table C7 2013-2016",
    "Table D1", "Table D2" ])

In [5]:
tidy['ONS Partner Geography'] = tidy['ONS Partner Geography'].map(lambda cell:cell.replace('Whole world', 'World'))

In [6]:
tidy['ONS Partner Geography'] = tidy['ONS Partner Geography'].str.rstrip('*')

In [7]:
import urllib.request as request
import csv
import io
import requests

r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_trade/master/codelists/itis-geographies.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)
url="https://raw.githubusercontent.com/ONS-OpenData/ref_trade/master/codelists/itis-geographies.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

tidy = pd.merge(new_table, c, how = 'left', left_on = 'ONS Partner Geography', right_on = 'Label')

tidy.columns = ['ONS Trade Areas ITIS' if x=='Notation' else x for x in tidy.columns]
tidy.columns = ['ITIS Service' if x== 'BOP Service' else x for x in tidy.columns]

In [8]:
tidy = tidy.drop(['Label','Parent Notation','Sort Priority','Description','ONS Partner Geography'], axis = 1)

In [9]:
tidy = tidy[['ONS Trade Areas ITIS','Year','Flow','ITIS Service','International Trade Basis','Measure Type','Value','Unit']]

In [10]:
from pathlib import Path
out = Path('out')
out.mkdir(exist_ok=True)
tidy.to_csv(out / 'tidy.csv', index = False)

In [11]:
from gssutils.metadata import THEME
scraper.dataset.family = 'Trade'
scraper.dataset.theme = THEME['business-industry-trade-energy']

with open(out / 'dataset.trig', 'wb') as metadata:
     metadata.write(scraper.generate_trig())