# CHP register

Takes data from the BEIS CHP register
https://chptools.decc.gov.uk/chp/public

* Locations are postcodes, which have to be converted to lat/long
* This is not pinpoint accurate
* Data does not contain dates of opening or closing

* data has been manually crosschecked against the map
* large sites already appearing in the data are excluded
* 99 sites below 1MW are excluded, totalling 25MW
* 6 sites with no postcode match are excluded (another 25MW)



In [44]:
import pandas as pd
import numpy as np
import pprint as pprint
import matplotlib.pyplot as plt
import pyproj
import re
import os

os.chdir('/Users/simon/Desktop/uk-map/')
%matplotlib inline


In [45]:
# read the full dataset

df = pd.read_csv('CHP Public Data.csv')

# replace the kW capacity column with one for MW

df['Capacity (MW)'] = df['CHPTPC (kWe)'] / 1000

# drop sites identified as already in the map
# first reset the index so each row is uniquely numbered

df = df.reset_index(drop=True)
df = df.drop(df[(df.POSTCODE == 'DN40 3DZ') | (df.POSTCODE == 'KY7 6GU') | (df.POSTCODE == 'DL17 0SD') | (df.COMPANYNAME.str.contains('sund'))].index)
df = df.drop(df[(df.COMPANYNAME=='IGGESUND PAPERBOARD (WORKINGTON) LTD')].index)

# create fuel column and assume either gas, bio or waste

df['Fuel'] = np.nan

df['Fuel'] = np.where(df['PRIMEMOVER'].str.contains('Gas ')
                      | df['PRIMEMOVER'].str.contains('gas ')
                      | df['PRIMEMOVER'].str.contains('Gas-')
                      | df['PRIMEMOVER'].str.contains('gas-')
                      | df['PRIMEMOVER'].str.contains('gas.')
                      | df['PRIMEMOVER'].str.contains('Reciprocating engine'),
                      'Gas', df['Fuel'])

df['Fuel'] = np.where((df['PRIMEMOVER'].str.contains('steam turbine')) &
                      (df['SECTOR'].str.contains('printing')), 'Biomass',
                      df['Fuel'])

df['Fuel'] = np.where(df.Fuel == 'nan', 'Waste', df['Fuel'])

# assign types

df['Type'] = np.where(df['PRIMEMOVER'].str.contains('Combined cycle gas'),
                      'Combined cycle', '')
df['Type'] = np.where(df['PRIMEMOVER'].str.contains('Simple'), 'Gas turbine',
                      df['Type'])
df['Type'] = np.where(df['PRIMEMOVER'].str.contains('Reciprocating'),
                      'Reciprocating engine', df['Type'])

# label as CHP and, for bio, low-carbon

df['CHP'] = 'Yes'
df['Low-Carbon'] = np.where(df['Fuel'] == 'Biomass', 'Yes', 'No')

# assume all sites opened before 2008 and won't close

df['yearStart'] = 2008
df['yearEnd'] = 2019

# tidy up

df.rename(columns={'COMPANYNAME': 'Site', 'CHPREGION': 'Region'}, inplace=True)

df['Region'] = df['Region'].str.title()
df['Site'] = df['Site'].str.title()

df.drop(['CHPTPC (kWe)', 'TOWN', 'COUNTY', 'SECTOR'], axis=1, inplace=True)

# drop 99 sites <1MW, totalling 25MW of capacity

df = df[df['Capacity (MW)'] >= 1]

In [46]:
# read the postcode data

# this is from the ONS
# I've used the Feb 2019 data as a number of boundary changes happened in April 2019
# https://geoportal.statistics.gov.uk/datasets/ons-postcode-directory-february-2019
# need to download and unzip the data then copy the ONSPD csv file to where you want it

pc = pd.read_csv(r'/Users/simon/ONSPD_FEB_2019_UK.csv')


In [47]:
# select postcode, LA code, lat and long

pc = pc[['pcd','oslaua','lat','long']]

# drop lines that have lat 99.99999 and long 0.0

pc = pc[pc.lat!=99.999999]

pc['pcd'] = pc['pcd'].str.replace(' ','')


In [48]:
pc.head()

Unnamed: 0,pcd,oslaua,lat,long
0,AB10AA,S12000033,57.101474,-2.242851
1,AB10AB,S12000033,57.102554,-2.246308
2,AB10AD,S12000033,57.100556,-2.248342
3,AB10AE,S12000034,57.084444,-2.255708
4,AB10AF,S12000033,57.096656,-2.258102


In [49]:
# import data on the lat/long of each UK postcode


#post_codes = pd.read_csv('ukpostcodes.csv')

# drop lines that have lat 99.999 and long 0.000

#post_codes = post_codes[post_codes.latitude!=99.999999]


In [51]:
# remove tabs and spaces from postcodes else they won't align

df['POSTCODE'] = df['POSTCODE'].str.strip()
df['POSTCODE'] = df['POSTCODE'].str.replace(' ','')


# merge the datasets

df2 = pd.merge(df,pc,left_on=['POSTCODE'],right_on=['pcd'], how='left')
df2.head()

Unnamed: 0,Site,PRIMEMOVER,POSTCODE,Region,Capacity (MW),Fuel,Type,CHP,Low-Carbon,yearStart,yearEnd,pcd,oslaua,lat,long
0,Aberdeen Heat And Power,Reciprocating engine,AB241ZZ,Scotland,2.126,Gas,Reciprocating engine,Yes,No,2008,2019,,,,
1,Agrivert Ltd,Reciprocating engine,OX294FL,South East,2.126,Gas,Reciprocating engine,Yes,No,2008,2019,OX294FL,E07000177,51.798231,-1.317532
2,Balcas Limited,Back pressure steam turbine,BT942ES,Northern Ireland,2.71,Waste,,Yes,No,2008,2019,BT942ES,N09000006,54.402625,-7.645222
3,Balcas Timber Ltd,Pass-out condensing steam turbine,IV180LE,Scotland,8.7,Waste,,Yes,No,2008,2019,IV180LE,S12000017,57.705719,-4.148079
4,Basf Bradford,Combined cycle gas turbine,BD120JZ,Yorkshire And Humberside,16.453,Gas,Combined cycle,Yes,No,2008,2019,BD120JZ,E08000032,53.751432,-1.755988


In [52]:
df2[df2.lat.isnull()]

Unnamed: 0,Site,PRIMEMOVER,POSTCODE,Region,Capacity (MW),Fuel,Type,CHP,Low-Carbon,yearStart,yearEnd,pcd,oslaua,lat,long
0,Aberdeen Heat And Power,Reciprocating engine,AB241ZZ,Scotland,2.126,Gas,Reciprocating engine,Yes,No,2008,2019,,,,
50,Imerys Minerals Ltd,Simple Cycle Gas turbine,PL241HV,South West,3.605,Gas,Gas turbine,Yes,No,2008,2019,,,,
101,Thames Water Utilities Limited,Reciprocating engine,RN138QS,South East,6.0,Gas,Reciprocating engine,Yes,No,2008,2019,,,,


In [53]:
# fill the blanks

df2['lat'] = np.where(df2['POSTCODE'].str.contains('PL241HV'),50.345964,df2['lat'])
df2['long'] = np.where(df2['POSTCODE'].str.contains('PL241HV'),-4.703515,df2['long'])
df2['lat'] = np.where(df2['POSTCODE'].str.contains('RN138QS'),51.518198,df2['lat'])
df2['long'] = np.where(df2['POSTCODE'].str.contains('RN138QS'),0.184272,df2['long'])




In [55]:
# tidy up and save

df2.drop(['PRIMEMOVER','POSTCODE','pcd','oslaua'], axis=1,inplace=True)

# export the data to a CSV ready for mapping

df2.to_csv('chp.csv', index=False, encoding='utf-8')

# analysis and checks

In [33]:
df.head()

Unnamed: 0,Site,PRIMEMOVER,POSTCODE,Region,Capacity (MW),Fuel,Type,CHP,Low-Carbon,yearStart,yearEnd
6,Aberdeen Heat And Power,Reciprocating engine,AB241ZZ,Scotland,2.126,Gas,Reciprocating engine,Yes,No,2008,2019
7,Agrivert Ltd,Reciprocating engine,OX294FL,South East,2.126,Gas,Reciprocating engine,Yes,No,2008,2019
22,Balcas Limited,Back pressure steam turbine,BT942ES,Northern Ireland,2.71,Waste,,Yes,No,2008,2019
23,Balcas Timber Ltd,Pass-out condensing steam turbine,IV180LE,Scotland,8.7,Waste,,Yes,No,2008,2019
24,Basf Bradford,Combined cycle gas turbine,BD120JZ,Yorkshire And Humberside,16.453,Gas,Combined cycle,Yes,No,2008,2019


In [43]:
pc[pc.pcd.str.contains('RN13 8')]

Unnamed: 0,pcd,oslaua,lat,long


In [35]:
df2[df2.lat.isnull()]
#df2.head()

Unnamed: 0,Site,PRIMEMOVER,POSTCODE,Region,Capacity (MW),Fuel,Type,CHP,Low-Carbon,yearStart,yearEnd,pcd,oslaua,lat,long
0,Aberdeen Heat And Power,Reciprocating engine,AB241ZZ,Scotland,2.126,Gas,Reciprocating engine,Yes,No,2008,2019,,,,
50,Imerys Minerals Ltd,Simple Cycle Gas turbine,PL241HV,South West,3.605,Gas,Gas turbine,Yes,No,2008,2019,,,,
101,Thames Water Utilities Limited,Reciprocating engine,RN138QS,South East,6.0,Gas,Reciprocating engine,Yes,No,2008,2019,,,,
