HM Revenue and Customs Alcohol Bulletin - July 2018

In [1]:
from gssutils import *

scraper = Scraper('https://www.uktradeinfo.com/Statistics/Pages/TaxAndDutyBulletins.aspx')
scraper

## Tax & Duty Bulletins

This is a catalog of datasets; choose one from the following:

* Hydrocarbon Oils Duties
* Alcohol Duty
* Insurance Premium Tax
* Value Added Tax
* Climate Change Levy
* Aggregates Levy
* Landfill Tax
* Betting, Gaming and Lottery Duties
* Air Passenger Duty
* Spirits

In [2]:
scraper.select_dataset(title='Alcohol Duty')
scraper

## Alcohol Duty

### Distributions

1. Alcohol Duty January 2019 ([MS Excel Spreadsheet](https://www.uktradeinfo.com/Statistics/Tax%20and%20Duty%20Bulletins/Alcohol0119.xls))


In [3]:
alcohol = scraper.distribution(title=lambda t: t.startswith('Alcohol Duty'))
tabs = alcohol.as_pandas(sheet_name=None)
tabs.keys()

odict_keys(['Graph-Data', 'Cover', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13'])

In [4]:
frames = []

tidy = pd.DataFrame()
for tab_name, script in [
    ('2', 'Alcohol HMRC(2).ipynb'),
    ('3', 'Alcohol HMRC(3).ipynb'),
    ('4', 'Alcohol HMRC(4).ipynb'),
    ('5', 'Alcohol HMRC(5).ipynb'),
    ('7', 'Alcohol HMRC(7).ipynb'),
    ('8', 'Alcohol HMRC(8).ipynb'),
    ('9', 'Alcohol HMRC(9).ipynb'),
    ('10', 'Alcohol HMRC(10).ipynb'),
    ('12', 'Alcohol HMRC(12).ipynb')]:
    tab = tabs[tab_name]
    %run "$script"
    frames.append(Final_table)
#     tidy = pd.concat([tidy, Final_table])

tidy = pd.concat(frames, ignore_index=True)
tidy.dropna(how='any',axis=0, inplace =True)
tidy

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0,Alcohol Content,Alcohol Duty,Category,Measure Type,Period,Revision,Unit,Value
0,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2013/14,,hectolitres,1.14348e+07
1,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2014/15,,hectolitres,1.09225e+07
2,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2015/16,,hectolitres,1.12365e+07
3,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2016/17,,hectolitres,1.12264e+07
4,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2017/18,,hectolitres,1.1192e+07
5,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2014,,hectolitres,1.12436e+07
6,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2015,,hectolitres,1.12097e+07
7,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2016,,hectolitres,1.11819e+07
8,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2017,,hectolitres,1.13227e+07
9,Not exceeding 15%,wine-of-fresh-grape,Still,quantities-consumption,2018,,hectolitres,1.11102e+07


In [5]:
tidy['Alcohol Content'].unique()

array(['Not exceeding 15%', 'Over 15% ABV',
       'Composition by Origin above 5.5% ABV', 'Total',
       'Above 1.2% but not exceeding 5.5% ABV',
       'Above 5.5% ABV but not exceeding 15%', 'pure alcohol', '',
       'Various', 'all', 'Over 1.2%, up to and including 4.0%',
       'Over 4.0%, up to and including 5.5%',
       'Over 5.5%, up to and including 15.0%',
       'From 8.5%, up to and including 15.0%',
       'Over 1.2%, up to and including 7.5%',
       'Over 7.5% but less than 8.5% ', 'Over 5.5% but less than 8.5%',
       'ABV 22%', '1.2% to 2.8%'], dtype=object)

In [6]:
tidy['Alcohol Content'] = tidy['Alcohol Content'].map(
    lambda x: {
        'Not exceeding 15%' : 'not-exc-15', 
        'Over 15% ABV' : 'over-15',
        'Composition by Origin above 5.5% ABV': 'comp-by-origin-above-5-5' ,
        'Total': 'all',
        'Above 1.2% but not exceeding 5.5% ABV' : 'above-1-2-not-exc-5-5',
        'pure alcohol':'pure-alcohol',
        'Various': 'all',
        '' : 'all',
        'Above 5.5% ABV but not exceeding 15%' : 'above-5-5-not-exc-15',
        'ABV 7.5%' : 'abv-7-5',
        '1.2% to 2.8%' : '1-2-to-2-8',
        'Over 1.2%, up to and including 4.0%' : 'over-1-2-up-to-and-incl-4-0',
        'Over 4.0%, up to and including 5.5%' : 'over-4-0-up-to-and-incl-5-5',
        'Over 15.0%, up to and including 22.0%' : 'over-15-0-up-to-and-incl-22',
        'Over 5.5%, less than 8.5%': 'over-5-5-less-than-8-5',
        'From 8.5%, up to and including 15.0%': 'from-8-5-up-to-and-incl-15-0',
        'Over 5.5% but less than 8.5%' : 'over-5-5-less-than-8-5',
        'ABV 22%' : 'abv-22',
        'Over 1.2%, up to and including 7.5%': 'over-1-2-up-to-and-incl-7-5',
        'Over 7.5% but less than 8.5% ' : 'over-7-5-less-than-8-5',
        'Over 5.5% but less than 8.5%' : 'over-5-5-less-than-8-5',
        'Over 5.5%, up to and including 15.0%' : 'over-5-5-up-to-and-incl-15-0'
        
    }.get(x, x))

In [7]:
tidy['Alcohol Content'].unique()

array(['not-exc-15', 'over-15', 'comp-by-origin-above-5-5', 'all',
       'above-1-2-not-exc-5-5', 'above-5-5-not-exc-15', 'pure-alcohol',
       'over-1-2-up-to-and-incl-4-0', 'over-4-0-up-to-and-incl-5-5',
       'over-5-5-up-to-and-incl-15-0', 'from-8-5-up-to-and-incl-15-0',
       'over-1-2-up-to-and-incl-7-5', 'over-7-5-less-than-8-5',
       'over-5-5-less-than-8-5', 'abv-22', '1-2-to-2-8'], dtype=object)

In [8]:
import datetime

In [9]:
def user_perc(x):    
    if str(x)[-3] == '/':
        return 'gregorian-interval/' + str(x)[:4] + '-04-01T00:00:00/P1Y'
    elif str(x)[-3] == ':':
        return 'month/' + str(x)[:7]
    elif str(x)[-3] == '.':
        return 'day/' + datetime.datetime.strptime(str(x), '%d.%m.%y').strftime('%Y-%m-%d')
    else:
        return 'year/'  + str(x)      
    
tidy['Period'] = tidy.apply(lambda row: user_perc(row['Period']), axis = 1)

In [10]:
tidy['Alcohol Category'] = tidy['Category'].map(
    lambda x: {
        'Still': 'still', 
        'Sparkling': 'sparkling', 
        'Over 15% ABV': 'total', 
        'Imported ex-ship': 'imported-ex-ship',
        'Ex-warehouse' : 'ex-warehouse', 
        'UK registered premises': 'uk-registered-premises',
        'Total wine of fresh grape': 'total-wine-of-fresh-grape', 
        'Total Wine': 'total-wine', 
        'Total Alcohol' : 'total-alcohol',
        'Above 1.2% but not exceeding 5.5% ABV' : 'total', 
        'Still2 ' :'still', 
        'Sparkling ' : 'sparkling' ,
        'Total made wine' : 'total-made-wine' , 
        'Total wine3 ' : 'total-wine', 
        'Total alcohol' : 'total-alcohol',
        'Production of Potable Spirits' : 'spirits' , 
        'Malt': 'hpw-malt', 
        'Grain and Blended' : 'hpw-grain-blended',
        'Total Home Produced Whisky' : 'hpw-total', 
        'Spirit Based RTDs' : 'spirit-based-rtds',
        'Imported and Other Spirits' : 'imported-and-other-spirits',
        'Net Quantities of Spirits Charged with Duty' : 'total-spirits', 
        'Total Spirits' : 'total-spirits',
        'UK Beer Production' : 'total-beer', 
        'UK Alcohol Production' : 'total-alcohol-production',
        'Ex-warehouse and imports': 'ex-warehouse-and-imports', 
        'Total beer clearances': 'total-beer-clearances' ,
        'Alcohol Clearances' : 'total-alcohol-clearances',
        'Cider Clearances' : 'total-cider-clearances', 
        'Total Beer' : 'total-beer',
        'Total Cider' : 'total-cider', 
        'Above 1.2% but not exceeding 5.5% ABV 1': 'total',
        'Still Wine' : 'still', 
        'Sparkling Wine' : 'sparkling', 
        'Ready-to-Drink ' : 'rtd', 
        'Still Cider' : 'still',
        'Sparkling Cider' : 'sparkling', 
        'Spirits-Based RTDs' : 'spirit-based-rtds', 
        'Spirits' : 'spirits', 
        'Beer' : 'beer',
        'Breweries Producing 5000 Hls Or Less' : 'breweries-5000-less',
        'Breweries Producing 5000 to 30000 Hls': 'breweries-5000-30000',
        'Breweries Producing 30000 to 60000 Hls' : 'breweries-30000-60000', 
        'High Strength Beers': 'high-strength-beers',
        'Low Strength Beers' :'low-strength-beers'  
        
        }.get(x, x))

In [11]:
tidy['Measure Type'].unique()

array(['quantities-consumption', 'revenue', 'potable-spirits',
       'net-quantities-spirits', 'uk-beer', 'alcohol-clearences',
       'beer-clearences', 'cider-clearences', 'rates-of-duty'],
      dtype=object)

In [12]:
tidy['Measure Type'] = tidy['Measure Type'].map(
    lambda x: {
        'quantities-consumption' : 'Quantities Released for Consumption', 
        'revenue' : 'Revenue',
        'potable-spirits':'Production of Potable Spirits',
        'net-quantities-spirits':'Net Quantities of Spirits Charged with Duty',
        'uk-beer':'UK Beer Production',
        'alcohol-clearences':'Alcohol Clearances',
        'beer-clearences':'Beer Clearances',
        'cider-clearences':'Cider Clearances',
        'rates-of-duty':'Rates of Duty'
        
       }.get(x, x))

In [13]:
tidy['Measure Type'].unique()

array(['Quantities Released for Consumption', 'Revenue',
       'Production of Potable Spirits',
       'Net Quantities of Spirits Charged with Duty',
       'UK Beer Production', 'Alcohol Clearances', 'Beer Clearances',
       'Cider Clearances', 'Rates of Duty'], dtype=object)

In [14]:
tidy['Unit'].unique()

array(['hectolitres', 'gbp-million', 'hectolitres-thousands',
       'gbp-per-hl-product', 'gbp-per-l-pure-alcohol',
       'gbp-per-1-abv-per-hl'], dtype=object)

In [15]:
tidy['Revision'].unique()

array(['', 'provisional', 'revised'], dtype=object)

In [16]:
tidy['Revision'] = tidy['Revision'].map(
    lambda x: {
        'estimated based on previous Periods' : 'estimated', 
        'estimated based on previous years' : 'estimated',
        '': 'original-value' 
       }.get(x, x))


In [17]:
tidy.head()

Unnamed: 0,Alcohol Content,Alcohol Duty,Category,Measure Type,Period,Revision,Unit,Value,Alcohol Category
0,not-exc-15,wine-of-fresh-grape,Still,Quantities Released for Consumption,gregorian-interval/2013-04-01T00:00:00/P1Y,original-value,hectolitres,11434800.0,still
1,not-exc-15,wine-of-fresh-grape,Still,Quantities Released for Consumption,gregorian-interval/2014-04-01T00:00:00/P1Y,original-value,hectolitres,10922500.0,still
2,not-exc-15,wine-of-fresh-grape,Still,Quantities Released for Consumption,gregorian-interval/2015-04-01T00:00:00/P1Y,original-value,hectolitres,11236500.0,still
3,not-exc-15,wine-of-fresh-grape,Still,Quantities Released for Consumption,gregorian-interval/2016-04-01T00:00:00/P1Y,original-value,hectolitres,11226400.0,still
4,not-exc-15,wine-of-fresh-grape,Still,Quantities Released for Consumption,gregorian-interval/2017-04-01T00:00:00/P1Y,original-value,hectolitres,11192000.0,still


In [18]:
tidy = tidy[tidy['Value'].isnull() == False]

In [19]:
tidy = tidy.drop_duplicates(subset=None, keep='first', inplace=False)

In [20]:
tidy['Value'] = tidy['Value'].astype(str)

In [21]:
tidy = tidy[['Period','Alcohol Category','Alcohol Duty','Alcohol Content','Measure Type','Value','Unit','Revision']]

In [22]:
from pathlib import Path

out = Path('out')
out.mkdir(exist_ok=True)
tidy.to_csv(out / 'observations.csv', index = False)

Try to grab the metadata from the spreadsheet's 'Cover' tab.

We already know the title and the comment.

In [23]:
import numpy as np
from dateutil.parser import parse

heading = None
stats_contacts = []
contact_info = []
for v in tabs['Cover']['Unnamed: 2']:
    if (type(v) == str) and (v.strip() in ['Coverage:', 'Theme:', 'Released:',
                                           'Next release:', 'Frequency of release:',
                                           'Media contact:', 'Statistical contacts:', 'Website:']):
        heading = v
    elif heading:
        if type(v) == str:
            if heading == 'Coverage:':
                if v == 'United Kingdom':
                    scraper.dataset.spatial = 'http://statistics.data.gov.uk/id/statistical-geography/K02000001'
                else:
                    assert False, 'Expected spatial coverage to be UK'
            elif heading == 'Theme:':
                if v == 'The Economy':
                    scraper.dataset.theme = 'https://www.statisticsauthority.gov.uk/themes/economy/'
                else:
                    assert False, 'Expected theme to be "The Economy"'
            elif heading == 'Released:':
                scraper.dataset.issued = parse(v)
            elif heading == 'Next release:':
                scraper.dataset.nextUpdateDue = parse(v)
            elif heading == 'Frequency of release:':
                pass
            elif heading == 'Website':
                scraper.dataset.landingPage = v
            if heading in ['Statistical contacts:', 'Media contact:']:
                contact_info.append(v)
            print(f'{heading} {v}')
        elif heading == 'Statistical contacts:':
            stats_contacts.append(contact_info)
            contact_info = []
        else:
            heading = None
            
scraper.dataset.family = 'health'
scraper.dataset.comment = 'The Alcohol Bulletin provides monthly statistics on clearances of' \
    'beer, wine, spirits and cider and duty receipts for the UK.'

with open(out / 'dataset.trig', 'wb') as metadata:
    metadata.write(scraper.generate_trig())


Coverage: United Kingdom
Theme:  The Economy
Released: 28 February 2019
Next release: 31 May 2019
Frequency of release: Quarterly
Media contact: HMRC Press Office
Media contact: 03000 585 024
Statistical contacts: Mark Armstrong-Wood
Statistical contacts: 03000 574 564
Statistical contacts: revenuemonitoring@hmrc.gsi.gov.uk
Statistical contacts: Mark Dickson
Statistical contacts: 03000 515 305
Statistical contacts: revenuemonitoring@hmrc.gsi.gov.uk
Statistical contacts: KAI Indirect Taxes, Customs & Coordination
Statistical contacts: Revenue Monitoring Team
Statistical contacts: HM Revenue and Customs
Statistical contacts: 100 Parliament Street
Statistical contacts: London
Statistical contacts: SW1A 2BQ
Website: https://www.uktradeinfo.com/Statistics/Pages/TaxAndDutyBulletins.aspx


In [24]:
tidy['Period'].unique()

array(['gregorian-interval/2013-04-01T00:00:00/P1Y',
       'gregorian-interval/2014-04-01T00:00:00/P1Y',
       'gregorian-interval/2015-04-01T00:00:00/P1Y',
       'gregorian-interval/2016-04-01T00:00:00/P1Y',
       'gregorian-interval/2017-04-01T00:00:00/P1Y', 'year/2014',
       'year/2015', 'year/2016', 'year/2017', 'year/2018',
       'month/2017-11', 'month/2017-12', 'month/2018-01', 'month/2018-02',
       'month/2018-03', 'month/2018-04', 'month/2018-05', 'month/2018-06',
       'month/2018-07', 'month/2018-08', 'month/2018-09', 'month/2018-10',
       'month/2018-11', 'month/2018-12', 'month/2019-01',
       'gregorian-interval/1999-04-01T00:00:00/P1Y',
       'gregorian-interval/2000-04-01T00:00:00/P1Y',
       'gregorian-interval/2001-04-01T00:00:00/P1Y',
       'gregorian-interval/2002-04-01T00:00:00/P1Y',
       'gregorian-interval/2003-04-01T00:00:00/P1Y',
       'gregorian-interval/2004-04-01T00:00:00/P1Y',
       'gregorian-interval/2005-04-01T00:00:00/P1Y',
       'g

In [25]:
tidy

Unnamed: 0,Period,Alcohol Category,Alcohol Duty,Alcohol Content,Measure Type,Value,Unit,Revision
0,gregorian-interval/2013-04-01T00:00:00/P1Y,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,11434771.0,hectolitres,original-value
1,gregorian-interval/2014-04-01T00:00:00/P1Y,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,10922458.0,hectolitres,original-value
2,gregorian-interval/2015-04-01T00:00:00/P1Y,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,11236512.0,hectolitres,original-value
3,gregorian-interval/2016-04-01T00:00:00/P1Y,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,11226356.0,hectolitres,original-value
4,gregorian-interval/2017-04-01T00:00:00/P1Y,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,11192015.0,hectolitres,original-value
5,year/2014,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,11243599.0,hectolitres,original-value
6,year/2015,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,11209744.0,hectolitres,original-value
7,year/2016,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,11181927.0,hectolitres,original-value
8,year/2017,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,11322703.0,hectolitres,original-value
9,year/2018,still,wine-of-fresh-grape,not-exc-15,Quantities Released for Consumption,11110242.0,hectolitres,original-value


In [26]:
tidy['Measure Type'].unique()

array(['Quantities Released for Consumption', 'Revenue',
       'Production of Potable Spirits',
       'Net Quantities of Spirits Charged with Duty',
       'UK Beer Production', 'Alcohol Clearances', 'Beer Clearances',
       'Cider Clearances', 'Rates of Duty'], dtype=object)