###  Balanceofpayments2017q3, TabA, 

In [1]:
from databaker.framework import *
import pandas as pd
import requests
from pathlib import Path
from datetime import datetime
import json
from pytz import timezone


Record some metadata about this transformation as we do it.

In [2]:
provActivity = {
    '@id': '../ONS_BoP_transformation',
    '@type': 'activity',
    'startedAtTime': datetime.now(timezone('Europe/London')).isoformat(),
    'label': 'ONS BoP transform to CSV'
}

ONS Balance of Payments spreadsheet is available directly from ONS website.

Fetch and cache locally, while recording where things come from for the provenance metadata.

In [3]:
provSources = []

sourceDir = Path('in')
sourceDir.mkdir(exist_ok=True)

urlPrefix = 'https://www.ons.gov.uk/file?uri=/economy/nationalaccounts/balanceofpayments/datasets/balanceofpaymentsstatisticalbulletintables/current/'
sources = ['balanceofpayments2017q3.xls']

for filename in sources:
    sourceFile = sourceDir / filename
    sourceUrl = f'{urlPrefix}{filename}'

    if not (sourceFile.exists() and sourceFile.is_file()):
        response = requests.get(f'{urlPrefix}{filename}')
        with open(sourceFile, 'wb') as f:
            f.write(response.content)
    provSources.append({
        '@id': sourceUrl,
        '@type': 'entity',
        'label': filename,
        'wasUsedBy': provActivity['@id']
    })

Just load the first/only source spreadsheet for now

In [4]:
inputFile = sourceDir / sources[0]
tab = loadxlstabs(inputFile)

Loading in/balanceofpayments2017q3.xls which has size 309248 bytes
Table names: ['Index', 'Records', 'Table A', 'Table B', 'Table C', 'Table D', 'Table E', 'Table F', 'Table G', 'Table H', 'Table I', 'Table J', 'Table K', 'Table R1', 'Table R2', 'Table R3']


In [5]:
tab = tab[2]

In [6]:
savepreviewhtml(tab)

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [7]:
observations = tab.excel_ref('D11').expand(DOWN).expand(RIGHT).is_not_blank()

In [8]:
savepreviewhtml(observations)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [9]:
Code = tab.excel_ref('C11').expand(DOWN).is_not_blank()

In [10]:
savepreviewhtml(Code)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [11]:
Year = tab.excel_ref('D4').expand(RIGHT).is_not_blank().is_not_whitespace()
Quarter = tab.excel_ref('D5').expand(RIGHT).is_not_whitespace()

In [12]:
savepreviewhtml(Year)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [13]:
savepreviewhtml(Quarter)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [14]:

Season = tab.excel_ref('B').filter(contains_string('adjusted')).is_not_whitespace()


In [15]:
savepreviewhtml(Season)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [16]:
Account = tab.excel_ref('B').expand(DOWN).by_index([9,27,29,33,51,53,55,64]) - tab.excel_ref('B66').expand(DOWN)

In [17]:
savepreviewhtml(Account)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [18]:
Balanceofpayments = tab.excel_ref('B').expand(DOWN).by_index([10,15,21,34,39,45,27,29,51,53,55,64])

In [19]:
savepreviewhtml(Balanceofpayments)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [20]:
Nettransactions = tab.excel_ref('B').is_not_blank().is_not_whitespace() - Season - Account - Balanceofpayments

In [21]:
Nettransactions = Nettransactions - tab.excel_ref('B66').expand(DOWN) - tab.excel_ref('B2').expand(UP)

In [22]:
savepreviewhtml(Nettransactions)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [23]:
Currency = tab.excel_ref('N3')

In [24]:
savepreviewhtml(Currency)

0
item 0

0,1,2,3,4,5,6,7,8,9,10,11,12,13
A,Summary of balance of payments,,,,,,,,,,,,
,Balances (net transactions),,,,,,,,,,,,
,,,,,,,,,,,,,£ million
,,,2015.0,2016.0,2015.0,2015.0,2016.0,2016.0,2016.0,2016.0,2017.0,2017.0,2017.0
,,,,,Q3,Q4,Q1,Q2,Q3,Q4,Q1,Q2,Q3
,,,,,,,,,,,,,
,Seasonally adjusted,,,,,,,,,,,,
,,,,,,,,,,,,,
,Current account,,,,,,,,,,,,
,Trade in goods and services,,,,,,,,,,,,


In [25]:
Dimensions = [
            HDimConst('GEOG', 'K02000001'),
            HDim(Year,'Year',DIRECTLY,ABOVE),
            HDim(Quarter,'Quarter',DIRECTLY,ABOVE),
            HDim(Code,'Code Identifier',DIRECTLY,LEFT),
            HDimConst('Currency', 'GBP(millions)'),            
            HDim(Nettransactions,'Nettransactions',DIRECTLY,LEFT),
            HDim(Balanceofpayments,'Balance of Payments',CLOSEST, ABOVE),
            HDim(Account,'Account',CLOSEST,ABOVE),
            HDim(Season,'Seasonal Adjustments',CLOSEST,ABOVE)
    
]

In [26]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)
#savepreviewhtml(c1)

In [27]:
new_table = c1.topandas()
new_table




Unnamed: 0,OBS,GEOG,Year,Quarter,Code,Currency,Nettransactions,Balance of Payments,Account,Seasonal Adjustments
0,-118626.0,K02000001,2015.0,,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted
1,-135495.0,K02000001,2016.0,,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted
2,-28790.0,K02000001,2015.0,Q3,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted
3,-29802.0,K02000001,2015.0,Q4,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted
4,-32019.0,K02000001,2016.0,Q1,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted
5,-30635.0,K02000001,2016.0,Q2,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted
6,-39769.0,K02000001,2016.0,Q3,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted
7,-33072.0,K02000001,2016.0,Q4,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted
8,-34345.0,K02000001,2017.0,Q1,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted
9,-32327.0,K02000001,2017.0,Q2,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted


In [28]:
new_table['TIME'] = new_table['Year'].map(lambda cell:cell.replace('.0', ''))
new_table

Unnamed: 0,OBS,GEOG,Year,Quarter,Code,Currency,Nettransactions,Balance of Payments,Account,Seasonal Adjustments,TIME
0,-118626.0,K02000001,2015.0,,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2015
1,-135495.0,K02000001,2016.0,,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016
2,-28790.0,K02000001,2015.0,Q3,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2015
3,-29802.0,K02000001,2015.0,Q4,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2015
4,-32019.0,K02000001,2016.0,Q1,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016
5,-30635.0,K02000001,2016.0,Q2,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016
6,-39769.0,K02000001,2016.0,Q3,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016
7,-33072.0,K02000001,2016.0,Q4,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016
8,-34345.0,K02000001,2017.0,Q1,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2017
9,-32327.0,K02000001,2017.0,Q2,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2017


In [29]:
print(len(new_table))
new_table['TIME'] = new_table['TIME'].map(str) + new_table['Quarter'] 

341


In [30]:
print(len(new_table))
new_table

341


Unnamed: 0,OBS,GEOG,Year,Quarter,Code,Currency,Nettransactions,Balance of Payments,Account,Seasonal Adjustments,TIME
0,-118626.0,K02000001,2015.0,,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2015
1,-135495.0,K02000001,2016.0,,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016
2,-28790.0,K02000001,2015.0,Q3,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2015 Q3
3,-29802.0,K02000001,2015.0,Q4,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2015 Q4
4,-32019.0,K02000001,2016.0,Q1,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016 Q1
5,-30635.0,K02000001,2016.0,Q2,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016 Q2
6,-39769.0,K02000001,2016.0,Q3,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016 Q3
7,-33072.0,K02000001,2016.0,Q4,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016 Q4
8,-34345.0,K02000001,2017.0,Q1,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2017 Q1
9,-32327.0,K02000001,2017.0,Q2,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2017 Q2


In [31]:
new_table = new_table.drop('Year', axis=1)
new_table = new_table.drop('Quarter', axis=1)


In [32]:
print(len(new_table))
new_table

341


Unnamed: 0,OBS,GEOG,Code,Currency,Nettransactions,Balance of Payments,Account,Seasonal Adjustments,TIME
0,-118626.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2015
1,-135495.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016
2,-28790.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2015 Q3
3,-29802.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2015 Q4
4,-32019.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016 Q1
5,-30635.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016 Q2
6,-39769.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016 Q3
7,-33072.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2016 Q4
8,-34345.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2017 Q1
9,-32327.0,K02000001,BOKI,GBP(millions),Trade in goods,Trade in goods and services,Current account,Seasonally adjusted,2017 Q2


In [None]:
new_table['Balance of Payments'] = new_table['Balance of Payments'].map(lambda x: x.rstrip('21')) 

In [None]:
new_table['Account'] = new_table['Account'].map(lambda x: x.rstrip('21'))

In [None]:
new_table.tail(10)

In [None]:
def user_perc(x):
    
    if x.strip() == 'Net errors and omissions':
        return ''
    else:
        return x
    
new_table['Balance of Payments'] = new_table.apply(lambda row: user_perc(row['Balance of Payments']), axis = 1)
    

In [33]:

new_table.fillna('', inplace=True)

outputDir = Path('out')
outputDir.mkdir(exist_ok=True)

outputFile = outputDir / (inputFile.relative_to(sourceDir).with_suffix('.csv'))

writetechnicalCSV(outputFile, new_table)

writing 1 conversion segments into /home/alex/Development/ONS/trade/ONS_BoP/out/balanceofpayments2017q3.csv
pdconversionwrite segment size 341


Output the PROV metadata as JSON-LD.

In [34]:
metadataDir = Path('metadata')
with open(metadataDir / 'prov_context.json') as contextFile:
    context = json.load(contextFile)

provActivity['endedAtTime'] = datetime.now(timezone('Europe/London')).isoformat()
prov = {
    '@context': context,
    '@graph': [ provActivity ] + provSources + [
        {
            '@id': '../' + str(outputFile),
            '@type': 'entity',
            'wasGeneratedBy': provActivity['@id'],
            'label': str(outputFile.relative_to(outputDir))
        }
    ]
}

with open(outputDir / 'prov.jsonld', 'w') as provFile:
    json.dump(prov, provFile, indent=2)