In [1]:
from gssutils import *

scraper = Scraper('https://www.ons.gov.uk/businessindustryandtrade/internationaltrade/datasets/' + \
                  'internationaltradeinservicesreferencetables')
scraper



## International trade in services

Detailed breakdown of annual trade in UK services estimates, analysing data by country, product and industry.

### Distributions

1. International trade in services ([MS Excel Spreadsheet](https://www.ons.gov.uk/file?uri=/businessindustryandtrade/internationaltrade/datasets/internationaltradeinservicesreferencetables/alltables2017/internationaltradeinservices2017.xls))


In [2]:
tabs = { tab.name: tab for tab in scraper.distribution().as_databaker() }
tabs.keys()

dict_keys(['Contents', 'Table A0', 'Table B1', 'Table B2', 'Table B3', 'Table C0', 'Table C1 2009-2012', 'Table C1 2013-2017', 'Table C2 2009-2012', 'Table C2 2013-2017', 'Table C3 2009-2012', 'Table C3 2013-2017', 'Table C4 2009-2012', 'Table C4 2013-2017', 'Table C5 2009-2012', 'Table C5 2013-2017', 'Table C6 2009-2012', ' Table C6 2013-2017', 'Table C7 2009-2012', 'Table C7 2013-2017', 'Table D1', 'Table D2'])

In [3]:
next_table = pd.DataFrame()

In [4]:
%%capture

def process_tab(script_name):
    %run "$script_name"
    return new_table

tidy = pd.concat(process_tab(s + '.ipynb') for s in [
    "TabA0", "TabB1", "TabB2", "TabB3", 
    "Table C1 2009-2012", "Table C1 2013-2017", "Table C2 2009-2012", "Table C2 2013-2017",
    "Table C3 2009-2012", "Table C3 2013-2017",
    "Table C4 2009-2012", "Table C4 2013-2017",
    "Table C5 2009-2012", "Table C5 2013-2017",
    "Table C6 2009-2012", "Table C6 2013-2017",
    "Table C7 2009-2012", "Table C7 2013-2017",
    "Table D1", "Table D2" ])

In [5]:
tidy['ONS Partner Geography'] = tidy['ONS Partner Geography'].map(
    lambda x: {
        'Whole world' : 'World', 
        'Total Unallocated ' : 'Total Unallocated',
        'Total Unallocated' : 'Total Unallocated',
        'International Organisations' : 'International Organisations' , 
        'Total Australasia and Oceania and Total Unallocated' : 'Total Australasia and Oceania and Total Unallocated',
        'Total Australasia, Oceania and Others*' : 'Total Australasia, Oceania and Others' , 
            }.get(x, x))


In [6]:
tidy['BOP Service'] = tidy['BOP Service'].map(
    lambda x: {
        'Construction  Services-Construction in the UK' : 'Construction Services-Construction in the UK',
        'Construction  Services-Construction outside the UK' : 'Construction Services-Construction outside the UK',
        'Construction  Services-Total' : 'Construction Services-Total',
        'Technical services-Other techincal services' : 'Technical services-Other technical services',
        'Merchanting and Other Trade related Services-Other trade - related services' :'Merchanting and Other Trade related Services-Other trade related services',
        'Technical and Scientific Services-Scientific and other technical services inc surveying' : 'Technical and Scientific Services-Scientific and other techinical services inc surveying',
        'Telecommunication, Computer and Information Services-Postal and courier' : 'Telecommunication, Computer and Information Services-Postal and courier',
        'Telecommunication, Computer and Information Services-Telecommunications': 'Telecommunication, Computer and Information Services-Telecommunications',
        'Telecommunication, Computer and Information Services-Computer Services' : 'Telecommunication, Computer and Information Services-Computer Services',
        'Telecommunication, Computer and Information Services-Publishing Services': 'Telecommunication, Computer and Information Services-Publishing Services',
        'Telecommunication, Computer and Information Services-News agency Services': 'Telecommunication, Computer and Information Services-News agency Services',
        'Telecommunication, Computer and Information Services-Information Services' : 'Telecommunication, Computer and Information Services-Information Services',
        'Telecommunication, Computer and Information Services-Total' : 'Telecommunication, Computer and Information Services-Information Services'

            }.get(x, x))

In [7]:
def remove_whitespace(x):
    try:
       x = "".join(x.split())
    except:
        pass
    return x

tidy['Flow'] = tidy['Flow'].apply(remove_whitespace)


In [8]:
import urllib.request as request
import csv
import io
import requests

r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_trade/master/codelists/itis-geographies.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)
url="https://raw.githubusercontent.com/ONS-OpenData/ref_trade/master/codelists/itis-geographies.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

tidy = pd.merge(tidy, c, how = 'left', left_on = 'ONS Partner Geography', right_on = 'Label')

tidy.columns = ['ONS Trade Areas ITIS' if x=='Notation' else x for x in tidy.columns]

r = request.urlopen('https://raw.githubusercontent.com/ONS-OpenData/ref_trade/master/codelists/itis-services.csv').read().decode('utf8').split("\n")
reader = csv.reader(r)
url="https://raw.githubusercontent.com/ONS-OpenData/ref_trade/master/codelists/itis-services.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

tidy = pd.merge(tidy, c, how = 'left', left_on = 'BOP Service', right_on = 'Label')

tidy.columns = ['ITIS Service' if x=='Notation' else x for x in tidy.columns]

In [9]:
tidy = tidy.drop(['ONS Partner Geography', 'BOP Service','Parent Notation_y','Sort Priority_y','Description_y','Label_y',
                 'Parent Notation_x', 'Sort Priority_x','Description_x','Label_x'], axis = 1)

In [10]:
tidy.drop_duplicates(inplace = True)

In [11]:
tidy = tidy[['ONS Trade Areas ITIS','Year','Flow','ITIS Service','ITIS Industry','International Trade Basis','Measure Type','Value','Unit']]

In [13]:
from pathlib import Path
out = Path('out')
out.mkdir(exist_ok=True)
tidy.to_csv(out / 'tidy.csv', index = False)

In [14]:
from gssutils.metadata import THEME
scraper.dataset.family = 'Trade'
scraper.dataset.theme = THEME['business-industry-trade-energy']

with open(out / 'dataset.trig', 'wb') as metadata:
     metadata.write(scraper.generate_trig())