Country by commodity imports

In [1]:
from gssutils import *

scraper = Scraper('https://www.ons.gov.uk/economy/nationalaccounts/balanceofpayments/datasets/' + \
                  'uktradecountrybycommodityimports')
scraper

## Trade in goods: country-by-commodity imports

Monthly import country-by-commodity data on the UK's trade in goods, including trade by all countries and selected commodities, non-seasonally adjusted.

### Distributions

1. Trade in goods: country-by-commodity imports ([MS Excel Spreadsheet](https://www.ons.gov.uk/file?uri=/economy/nationalaccounts/balanceofpayments/datasets/uktradecountrybycommodityimports/current/countrybycommodityimportsfinal.xlsx))


In [2]:
tab = scraper.distribution().as_pandas()
tab



Unnamed: 0,COMMODITY,COUNTRY,DIRECTION,1998JAN,1998FEB,1998MAR,1998APR,1998MAY,1998JUN,1998JUL,...,2018JAN,2018FEB,2018MAR,2018APR,2018MAY,2018JUN,2018JUL,2018AUG,2018SEP,2018OCT
0,0 Food & live animals,AD Andorra,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0 Food & live animals,AE United Arab Emirates,IM Imports,1,0,12,29,11,2,0,...,0,0,8,1,0,0,0,0,0,0
2,0 Food & live animals,AF Afghanistan,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0 Food & live animals,AG Antigua & Barbuda,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0 Food & live animals,AI Anguilla,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0 Food & live animals,AL Albania,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0 Food & live animals,AM Armenia,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0 Food & live animals,AO Angola,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
8,0 Food & live animals,AQ Antarctica,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0 Food & live animals,AR Argentina,IM Imports,3,11,3,9,15,10,13,...,32,59,58,13,35,43,65,18,33,38


In [3]:
tab = tab.drop(['DIRECTION'], axis = 1)

In [4]:
tab.columns.values[0] = 'CORD SITC'
tab.columns.values[1] = 'ONS Partner Geography'

In [5]:
new_table = pd.melt(tab, id_vars=['CORD SITC','ONS Partner Geography'], var_name='Period', value_name='Value')

In [6]:
import re
YEAR_RE = re.compile(r'[0-9]{4}')
YEAR_MONTH_RE = re.compile(r'([0-9]{4})(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)')
YEAR_QUARTER_RE = re.compile(r'([0-9]{4})\s+(Q[1-4])')

# from https://stackoverflow.com/questions/597476/how-to-concisely-cascade-through-multiple-regex-statements-in-python
class Re(object):
  def __init__(self):
    self.last_match = None
  def fullmatch(self,pattern,text):
    self.last_match = re.fullmatch(pattern,text)
    return self.last_match

def time2period(t):
    gre = Re()
    if gre.fullmatch(YEAR_RE, t):
        return f"year/{t}"
    elif gre.fullmatch(YEAR_MONTH_RE, t):
        year, month = gre.last_match.groups()
        month_num = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06',
                     'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}.get(month)
        return f"month/{year}-{month_num}"
    elif gre.fullmatch(YEAR_QUARTER_RE, t):
        year, quarter = gre.last_match.groups()
        return f"quarter/{year}-{quarter}"
    else:
        print(f"no match for {t}")

new_table['Period'] = new_table['Period'].apply(time2period)

In [7]:
new_table['Seasonal Adjustment'] =  'NSA'
new_table['Measure Type'] =  'GBP Total'
new_table['Unit'] =  'gbp-million'
new_table['Flow'] = 'imports'

Todo: check logic below -- are these data markers being set to zero?

In [8]:
new_table['Value'] = pd.to_numeric(new_table['Value'], errors='coerce').fillna(0)

In [9]:
new_table['Value'] = new_table['Value'].astype(int)

In [10]:
new_table['ONS Partner Geography'] = new_table['ONS Partner Geography'].astype(str).str[0:2] 

In [11]:
new_table['CORD SITC'] = new_table['CORD SITC'].str.partition(' ')

Todo: check the following logic - why filter out zero valued measurements?

In [12]:
new_table = new_table[new_table['Value'] !=  0 ]

In [13]:
new_table = new_table[['ONS Partner Geography', 'Period','Flow','CORD SITC', 'Seasonal Adjustment', 'Measure Type','Value','Unit' ]]

In [14]:
new_table

Unnamed: 0,ONS Partner Geography,Period,Flow,CORD SITC,Seasonal Adjustment,Measure Type,Value,Unit
1,AE,month/1998-01,imports,0,NSA,GBP Total,1,gbp-million
9,AR,month/1998-01,imports,0,NSA,GBP Total,3,gbp-million
11,AT,month/1998-01,imports,0,NSA,GBP Total,1,gbp-million
12,AU,month/1998-01,imports,0,NSA,GBP Total,17,gbp-million
15,B5,month/1998-01,imports,0,NSA,GBP Total,659,gbp-million
19,BE,month/1998-01,imports,0,NSA,GBP Total,47,gbp-million
29,BR,month/1998-01,imports,0,NSA,GBP Total,14,gbp-million
35,BZ,month/1998-01,imports,0,NSA,GBP Total,1,gbp-million
36,CA,month/1998-01,imports,0,NSA,GBP Total,13,gbp-million
41,CH,month/1998-01,imports,0,NSA,GBP Total,3,gbp-million
