HM Revenue and Customs Alcohol Bulletin - July 2018

In [1]:
from gssutils import *

scraper = Scraper('https://www.uktradeinfo.com/Statistics/Pages/TaxAndDutyBulletins.aspx')
scraper

### Distributions

1. Value Added Tax September 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/VAT0918.xls))
1. Hydrocarbon Oils Duties August 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/Oils0818.xls))
1. Alcohol Duty July 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/Alcohol0718.xls))
1. Tobacco Duties July 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/Tobacco0718.xls))
1. Insurance Premium Tax June 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/ipt0618.xls))
1. Climate Change Levy May 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/ccl0518.xls))
1. Aggregates Levy May 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/agl0518.xls))
1. Landfill Tax April 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/lft0418.xls))
1. Betting, Gaming and Lottery Duties March 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/betting0318.xls))
1. Air Passenger Duty March 2018 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/apd0318.xls))


In [2]:
alcohol = scraper.distribution(title=lambda t: t.startswith('Alcohol Duty'))
tabs = alcohol.as_pandas(sheet_name=None)
tabs.keys()

odict_keys(['Graph-Data', 'Cover', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13'])

In [3]:
frames = []

for tab_name, script in [
    ('2', 'Alcohol HMRC(2).ipynb'),
    ('3', 'Alcohol HMRC(3).ipynb'),
    ('4', 'Alcohol HMRC(4).ipynb'),
    ('5', 'Alcohol HMRC(5).ipynb'),
    ('7', 'Alcohol HMRC(7).ipynb'),
    ('8', 'Alcohol HMRC(8).ipynb'),
    ('9', 'Alcohol HMRC(9).ipynb'),
    ('10', 'Alcohol HMRC(10).ipynb'),
    ('12', 'Alcohol HMRC(12).ipynb')]:
    tab = tabs[tab_name]
    %run "$script"
    frames.append(new_table)

tidy = pd.concat(frames, ignore_index=True)
tidy.dropna(how='any',axis=0, inplace =True)
tidy

Unnamed: 0,Year,Category,Alcohol Content,Measure Type,Value,Unit,Revision
0,2013/14,Still,Not exceeding 15%,Net Volume,11434771,hectolitres,
1,2014/15,Still,Not exceeding 15%,Net Volume,10922458,hectolitres,
2,2015/16,Still,Not exceeding 15%,Net Volume,11236512,hectolitres,
3,2016/17,Still,Not exceeding 15%,Net Volume,11226356,hectolitres,
4,2017/18,Still,Not exceeding 15%,Net Volume,11192015,hectolitres,
5,2013,Still,Not exceeding 15%,Net Volume,11585699,hectolitres,
6,2014,Still,Not exceeding 15%,Net Volume,11243599,hectolitres,
7,2015,Still,Not exceeding 15%,Net Volume,11209744,hectolitres,
8,2016,Still,Not exceeding 15%,Net Volume,11181927,hectolitres,
9,2017,Still,Not exceeding 15%,Net Volume,11322703,hectolitres,


In [4]:
from pathlib import Path

out = Path('out')
out.mkdir(exist_ok=True)
tidy.to_csv(out / 'observations.csv', index = False)

Try to grab the metadata from the spreadsheet's 'Cover' tab.

We already know the title and the comment.

In [5]:
import numpy as np
from dateutil.parser import parse

heading = None
stats_contacts = []
contact_info = []
for v in tabs['Cover']['Unnamed: 2']:
    if (type(v) == str) and (v.strip() in ['Coverage:', 'Theme:', 'Released:',
                                           'Next release:', 'Frequency of release:',
                                           'Media contact:', 'Statistical contacts:', 'Website:']):
        heading = v
    elif heading:
        if type(v) == str:
            if heading == 'Coverage:':
                if v == 'United Kingdom':
                    scraper.dataset.spatial = 'http://statistics.data.gov.uk/id/statistical-geography/K02000001'
                else:
                    assert False, 'Expected spatial coverage to be UK'
            elif heading == 'Theme:':
                if v == 'The Economy':
                    scraper.dataset.theme = 'https://www.statisticsauthority.gov.uk/themes/economy/'
                else:
                    assert False, 'Expected theme to be "The Economy"'
            elif heading == 'Released:':
                scraper.dataset.issued = parse(v)
            elif heading == 'Next release:':
                scraper.dataset.nextUpdateDue = parse(v)
            elif heading == 'Frequency of release:':
                pass
            elif heading == 'Website':
                scraper.dataset.landingPage = v
            if heading in ['Statistical contacts:', 'Media contact:']:
                contact_info.append(v)
            print(f'{heading} {v}')
        elif heading == 'Statistical contacts:':
            stats_contacts.append(contact_info)
            contact_info = []
        else:
            heading = None
            
scraper.dataset.family = 'health'
scraper.dataset.comment = 'The Alcohol Bulletin provides monthly statistics on clearances of' \
    'beer, wine, spirits and cider and duty receipts for the UK.'

with open(out / 'dataset.trig', 'wb') as metadata:
    metadata.write(scraper.generate_trig())


Coverage: United Kingdom
Theme:  The Economy
Released: 31 August 2018
Next release: 30 November 2018
Frequency of release: Quarterly
Media contact: HMRC Press Office
Media contact: 03000 585 024
Statistical contacts: Mark Armstrong-Wood
Statistical contacts: 03000 574 564
Statistical contacts: revenuemonitoring@hmrc.gsi.gov.uk
Statistical contacts: Mark Dickson
Statistical contacts: 03000 515 305
Statistical contacts: revenuemonitoring@hmrc.gsi.gov.uk
Statistical contacts: KAI Indirect Taxes, Customs & Coordination
Statistical contacts: Revenue Monitoring Team
Statistical contacts: HM Revenue and Customs
Statistical contacts: 100 Parliament Street
Statistical contacts: London
Statistical contacts: SW1A 2BQ
Website: https://www.uktradeinfo.com/Statistics/Pages/TaxAndDutyBulletins.aspx
