Business count by Employee Size

In [1]:
from databaker.framework import *
import pandas as pd

In [2]:
import requests
from pathlib import Path

sourceFolder = Path('in')
sourceFolder.mkdir(exist_ok=True)

inputURL = 'https://www.uktradeinfo.com/statistics/documents/idbr_ots_tables_2015.xls'
inputFile = sourceFolder / 'idbr_ots_tables_2015.xls'
if not(inputFile.exists() and inputFile.is_file()):
    response = requests.get(inputURL)
    with open(inputFile, 'wb') as f:
        f.write(response.content)

In [3]:
tab = loadxlstabs(inputFile)

Loading in\idbr_ots_tables_2015.xls which has size 174080 bytes
Table names: ['Notes and Contents', 'Industry Group', 'Age Group', 'Employee Size', 'Industry_Age', 'Industry_EmployeeSize', 'EmployeeSize_Age', 'Metadata']


In [4]:
tab = tab[3]

In [5]:
savepreviewhtml(tab)

0,1,2,3,4,5,6,7,8
TRADE IN GOODS STATISTICS MATCHED WITH REGISTERED BUSINESSES FROM THE INTER-DEPARTMENTAL BUSINESS REGISTER,,,,,,,,
,,,,,,,,
Release Date 24/11/2016,,,,,,,,
,,,,,,,,
Total value of UK trade by Employee Size,,,,£ millions,,,,
,,Exports,,Imports,,,,
Employee Size,,2015.0,,2015.0,,,,
,,,,,,,,
0.0,,26919.0,,44923.0,,,,
1 to 9,,16369.0,,26778.0,,,,


In [6]:
observations = tab.excel_ref('C23').expand(DOWN).expand(RIGHT).is_not_blank()-tab.excel_ref('C22').expand(UP)

In [7]:
observations = observations - tab.excel_ref('C31').expand(DOWN).expand(RIGHT)

In [8]:
Employeesize = tab.excel_ref('A').expand(DOWN)

In [9]:
Flow = tab.excel_ref('C6').expand(RIGHT)

In [10]:
Dimensions = [
            HDimConst('Geography', 'K02000001'),
            HDimConst('Year','2015'),
            HDimConst('Unit', 'Count'), 
            HDimConst('Measure Type','Business'),            
            HDim(Employeesize, 'Total Employees', DIRECTLY, LEFT),
            HDim(Flow, 'Flow', CLOSEST, LEFT)    
]

In [11]:
c1 = ConversionSegment(observations, Dimensions, processTIMEUNIT=True)

In [12]:
savepreviewhtml(c1)

0,1,2
OBS,Total Employees,Flow

0,1,2,3,4,5,6,7,8
TRADE IN GOODS STATISTICS MATCHED WITH REGISTERED BUSINESSES FROM THE INTER-DEPARTMENTAL BUSINESS REGISTER,,,,,,,,
,,,,,,,,
Release Date 24/11/2016,,,,,,,,
,,,,,,,,
Total value of UK trade by Employee Size,,,,£ millions,,,,
,,Exports,,Imports,,,,
Employee Size,,2015.0,,2015.0,,,,
,,,,,,,,
0.0,,26919.0,,44923.0,,,,
1 to 9,,16369.0,,26778.0,,,,


In [13]:
new_table = c1.topandas()




In [14]:
new_table.rename(index= str, columns= {'OBS':'Value'}, inplace = True)

In [15]:
new_table['Value'] = new_table['Value'].astype('int', copy=False)

In [16]:
new_table['Total Employees'] = new_table['Total Employees'].str.strip()
new_table['Year'] = new_table['Year'].str.strip()
new_table['Flow'] = new_table['Flow'].str.strip()
new_table['Unit'] = new_table['Unit'].str.strip()
new_table['Measure Type'] = new_table['Measure Type'].str.strip()
new_table['Geography'] = new_table['Geography'].str.strip()

In [17]:
new_table = new_table[['Geography','Year','Total Employees','Flow','Measure Type','Value','Unit']]

In [18]:
new_table.head(4)

Unnamed: 0,Geography,Year,Total Employees,Flow,Measure Type,Value,Unit
0,K02000001,2015,0.0,Exports,Business,9156,Count
1,K02000001,2015,0.0,Imports,Business,14983,Count
2,K02000001,2015,1 to 9,Exports,Business,77281,Count
3,K02000001,2015,1 to 9,Imports,Business,117876,Count


In [19]:
new_table.tail(4)

Unnamed: 0,Geography,Year,Total Employees,Flow,Measure Type,Value,Unit
10,K02000001,2015,Unknown,Exports,Business,9746,Count
11,K02000001,2015,Unknown,Imports,Business,23337,Count
12,K02000001,2015,Total,Exports,Business,143358,Count
13,K02000001,2015,Total,Imports,Business,219302,Count


In [20]:
out = Path('out')
out.mkdir(exist_ok=True)
new_table.to_csv(out / 'BusinesscountbyEmployeeSize.csv', index = False)