Extract tables from November 2017 Overseas Trade Statistics (import).

Load the Excel sheet and store a copy locally.

In [1]:
from databaker.framework import *
import requests
import json
from pathlib import Path
import datetime
import pandas

stats_file = Path('in/OTS_IMP_1711.xls')

url = 'https://www.uktradeinfo.com/Statistics/OTS%20Releases/OTS_IMP_1711.xls'

if not (stats_file.exists() and stats_file.is_file()):
    response = requests.get(url)
    with open(stats_file, 'wb') as f:
        f.write(response.content)

sheets = loadxlstabs(stats_file)

Loading in/OTS_IMP_1711.xls which has size 105984 bytes
Table names: ['EU Imports', 'Non-EU Imports', 'Metadata']


In [2]:
assert(len(sheets) >= 3)
eu_imports = sheets[0]
assert(eu_imports.name == 'EU Imports')

assert(eu_imports.excel_ref('A3').value == 'Chap')

period = datetime.datetime.strptime(eu_imports.excel_ref('C3').value, '%B %Y')

chapters = eu_imports.excel_ref('A3').fill(DOWN).regex('[0-9]{2}')

data = pandas.DataFrame({
    'cn-chapter': [cell.value for cell in chapters],
    'year-month': period.strftime('%Y-%m'),
    'gbp-thousands': [int(cell.value) for cell in chapters.shift(RIGHT).shift(RIGHT)]
})

data.to_csv('out/eu_imports.csv', index=False)