# Country by commodity imports

In [2]:
from gssutils import *

scraper = Scraper('https://www.ons.gov.uk/economy/nationalaccounts/balanceofpayments/datasets/' + \
                  'uktradecountrybycommodityimports')
scraper

## Trade in goods: country-by-commodity imports

Monthly import country-by-commodity data on the UK's trade in goods, including trade by all countries and selected commodities, non-seasonally adjusted.

### Distributions

1. Trade in goods: country-by-commodity imports ([MS Excel Spreadsheet](https://www.ons.gov.uk/file?uri=/economy/nationalaccounts/balanceofpayments/datasets/uktradecountrybycommodityimports/current/countrybycommodityimportsfinal.xlsx))


In [9]:
data = scraper.distribution().as_pandas(sheet_name = 1, dtype={
    'COMMODITY': 'category',
    'COUNTRY': 'category',
    'DIRECTION': 'category'
}, na_values=[''], keep_default_na=False)
data

Unnamed: 0,COMMODITY,COUNTRY,DIRECTION,1998JAN,1998FEB,1998MAR,1998APR,1998MAY,1998JUN,1998JUL,...,2018NOV,2018DEC,2019JAN,2019FEB,2019MAR,2019APR,2019MAY,2019JUN,2019JUL,2019AUG
0,0 Food & live animals,AD Andorra,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,1647,0,0,0,0
1,0 Food & live animals,AE United Arab Emirates,IM Imports,899075,380883,11820533,29235722,11438351,1628558,342104,...,635597,570601,808013,1042926,10339473,2152939,1780878,675000,560763,891423
2,0 Food & live animals,AF Afghanistan,IM Imports,25217,76800,12294,110214,0,55531,104659,...,27228,4752,47404,74654,14587,46498,72706,33636,1110,4916
3,0 Food & live animals,AG Antigua & Barbuda,IM Imports,2569,0,0,56358,0,0,0,...,0,0,0,0,0,0,1043,1129,63699,32203
4,0 Food & live animals,AI Anguilla,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0 Food & live animals,AL Albania,IM Imports,0,0,0,0,0,0,0,...,41657,10500,46229,20753,51771,23863,21061,31716,60367,39665
6,0 Food & live animals,AM Armenia,IM Imports,0,0,0,0,10155,0,1336,...,14583,0,0,0,0,3982,0,0,0,0
7,0 Food & live animals,AO Angola,IM Imports,62735,12602,0,0,0,37458,0,...,0,714386,0,843710,0,0,35103,0,0,0
8,0 Food & live animals,AQ Antarctica,IM Imports,0,0,0,0,0,0,0,...,0,0,0,0,11385,0,0,0,0,0
9,0 Food & live animals,AR Argentina,IM Imports,3996860,10886680,4731677,7384214,13882494,9794335,13672416,...,81253505,5830700,45869772,9255878,35879371,22761412,56044946,31004690,40925877,71358341


In [10]:
table = data.drop(columns='DIRECTION')
table.rename(columns={
    'COMMODITY': 'CORD SITC',
    'COUNTRY': 'ONS Partner Geography'}, inplace=True)
table = pd.melt(table, id_vars=['CORD SITC','ONS Partner Geography'], var_name='Period', value_name='Value')
table['Period'] = table['Period'].astype('category')
#table['Value'] = table['Value'].astype(int)

In [11]:
#display(table['CORD SITC'].cat.categories)
#display(table['ONS Partner Geography'].cat.categories)

Index(['0 Food & live animals', '00 Live animals',
       '01 Meat & meat preparations', '02 Dairy products & eggs',
       '03 Fish & shellfish', '04 Cereals', '05 Vegetables & fruit',
       '06 Sugar', '07 Coffee, tea, cocoa etc', '08 Animal feeding stuffs',
       ...
       '897C Jewellery', '89I Other manufactures (intermediate)',
       '89K Other manufactures (capital)',
       '89OC Other manufactures (consumer)', '8O Other manufactures',
       '8OC Other miscellaneous manufactures (consumer)',
       '8OI Other miscellaneous manufactures (intermediate)',
       '8OK Other miscellaneous manufactures (capital)', '9 Unspecified goods',
       'T Total'],
      dtype='object', length=125)

Index(['AD Andorra', 'AE United Arab Emirates', 'AF Afghanistan',
       'AG Antigua & Barbuda', 'AI Anguilla', 'AL Albania', 'AM Armenia',
       'AO Angola', 'AQ Antarctica', 'AR Argentina',
       ...
       'VN Vietnam', 'VU Vanuatu', 'W1 Whole world', 'WF Wallis & Futuna',
       'WS Samoa', 'XK Kosovo', 'YE Yemen', 'ZA South Africa', 'ZM Zambia',
       'ZW Zimbabwe'],
      dtype='object', length=237)

Fix up category strings

In [12]:
table['CORD SITC'].cat.categories = table['CORD SITC'].cat.categories.map(lambda x: x.split(' ')[0])
table['ONS Partner Geography'].cat.categories = table['ONS Partner Geography'].cat.categories.map(lambda x: x[:2])
#display(table['CORD SITC'].cat.categories)
#display(table['ONS Partner Geography'].cat.categories)

Index(['0', '00', '01', '02', '03', '04', '05', '06', '07', '08',
       ...
       '897C', '89I', '89K', '89OC', '8O', '8OC', '8OI', '8OK', '9', 'T'],
      dtype='object', length=125)

Index(['AD', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AO', 'AQ', 'AR',
       ...
       'VN', 'VU', 'W1', 'WF', 'WS', 'XK', 'YE', 'ZA', 'ZM', 'ZW'],
      dtype='object', length=237)

In [13]:
import re
YEAR_RE = re.compile(r'[0-9]{4}')
YEAR_MONTH_RE = re.compile(r'([0-9]{4})(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)')
YEAR_QUARTER_RE = re.compile(r'([0-9]{4})\s+(Q[1-4])')

# from https://stackoverflow.com/questions/597476/how-to-concisely-cascade-through-multiple-regex-statements-in-python
class Re(object):
  def __init__(self):
    self.last_match = None
  def fullmatch(self,pattern,text):
    self.last_match = re.fullmatch(pattern,text)
    return self.last_match

def time2period(t):
    gre = Re()
    if gre.fullmatch(YEAR_RE, t):
        return f"year/{t}"
    elif gre.fullmatch(YEAR_MONTH_RE, t):
        year, month = gre.last_match.groups()
        month_num = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06',
                     'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}.get(month)
        return f"month/{year}-{month_num}"
    elif gre.fullmatch(YEAR_QUARTER_RE, t):
        year, quarter = gre.last_match.groups()
        return f"quarter/{year}-{quarter}"
    else:
        print(f"no match for {t}")

table['Period'].cat.categories = table['Period'].cat.categories.map(time2period)

In [14]:
table['Seasonal Adjustment'] = pd.Series('NSA', index=table.index, dtype='category')
table['Measure Type'] = pd.Series('GBP Total', index=table.index, dtype='category')
table['Unit'] = pd.Series('gbp-million', index=table.index, dtype='category')
table['Flow'] = pd.Series('imports', index=table.index, dtype='category')

In [15]:
#table.memory_usage()

Index                          80
CORD SITC                 7708620
ONS Partner Geography    15417136
Period                   15417320
Value                    61620000
Seasonal Adjustment       7702588
Measure Type              7702588
Unit                      7702588
Flow                      7702588
dtype: int64

In [16]:
table = table[['ONS Partner Geography', 'Period','Flow','CORD SITC', 'Seasonal Adjustment', 'Measure Type','Value','Unit' ]]
#table

Unnamed: 0,ONS Partner Geography,Period,Flow,CORD SITC,Seasonal Adjustment,Measure Type,Value,Unit
0,AD,month/1998-01,imports,0,NSA,GBP Total,0,gbp-million
1,AE,month/1998-01,imports,0,NSA,GBP Total,899075,gbp-million
2,AF,month/1998-01,imports,0,NSA,GBP Total,25217,gbp-million
3,AG,month/1998-01,imports,0,NSA,GBP Total,2569,gbp-million
4,AI,month/1998-01,imports,0,NSA,GBP Total,0,gbp-million
5,AL,month/1998-01,imports,0,NSA,GBP Total,0,gbp-million
6,AM,month/1998-01,imports,0,NSA,GBP Total,0,gbp-million
7,AO,month/1998-01,imports,0,NSA,GBP Total,62735,gbp-million
8,AQ,month/1998-01,imports,0,NSA,GBP Total,0,gbp-million
9,AR,month/1998-01,imports,0,NSA,GBP Total,3996860,gbp-million


In [17]:
#table.count()

ONS Partner Geography    7702500
Period                   7702500
Flow                     7702500
CORD SITC                7702500
Seasonal Adjustment      7702500
Measure Type             7702500
Value                    7702500
Unit                     7702500
dtype: int64

In [11]:
#table.dtypes

ONS Partner Geography    category
Period                   category
Flow                     category
CORD SITC                category
Seasonal Adjustment      category
Measure Type             category
Value                       int64
Unit                     category
dtype: object