# CHP register

Takes data from the BEIS CHP register
https://chptools.decc.gov.uk/chp/public

* Locations are postcodes, which have to be converted to lat/long
* This is not pinpoint accurate
* Data does not contain dates of opening or closing

* data has been manually crosschecked against the map
* large sites already appearing in the data are excluded
* 99 sites below 1MW are excluded, totalling 25MW
* 6 sites with no postcode match are excluded (another 25MW)



In [225]:
import pandas as pd
import numpy as np
import pprint as pprint
import matplotlib.pyplot as plt
import pyproj
import re
import os

os.chdir('/Users/simon/Desktop/uk-map/')
%matplotlib inline


In [226]:
# read the full dataset

df = pd.read_csv('CHP Public Data.csv')

# replace the kW capacity column with one for MW

df['Capacity (MW)'] = df['CHPTPC (kWe)'] / 1000

# drop sites identified as already in the map
# first reset the index so each row is uniquely numbered

df = df.reset_index(drop=True)
df = df.drop(df[(df.POSTCODE == 'DN40 3DZ') | (df.POSTCODE == 'KY7 6GU') | (df.POSTCODE == 'DL17 0SD') | (df.COMPANYNAME.str.contains('sund'))].index)
df = df.drop(df[(df.COMPANYNAME=='IGGESUND PAPERBOARD (WORKINGTON) LTD')].index)

# create fuel column and assume either gas, bio or waste

df['Fuel'] = np.nan

df['Fuel'] = np.where(df['PRIMEMOVER'].str.contains('Gas ')
                      | df['PRIMEMOVER'].str.contains('gas ')
                      | df['PRIMEMOVER'].str.contains('Gas-')
                      | df['PRIMEMOVER'].str.contains('gas-')
                      | df['PRIMEMOVER'].str.contains('gas.')
                      | df['PRIMEMOVER'].str.contains('Reciprocating engine'),
                      'Gas', df['Fuel'])

df['Fuel'] = np.where((df['PRIMEMOVER'].str.contains('steam turbine')) &
                      (df['SECTOR'].str.contains('printing')), 'Biomass',
                      df['Fuel'])

df['Fuel'] = np.where(df.Fuel == 'nan', 'Waste', df['Fuel'])

# assign types

df['Type'] = np.where(df['PRIMEMOVER'].str.contains('Combined cycle gas'),
                      'Combined cycle', '')
df['Type'] = np.where(df['PRIMEMOVER'].str.contains('Simple'), 'Gas turbine',
                      df['Type'])
df['Type'] = np.where(df['PRIMEMOVER'].str.contains('Reciprocating'),
                      'Reciprocating engine', df['Type'])

# label as CHP and, for bio, low-carbon

df['CHP'] = 'Yes'
df['Low-Carbon'] = np.where(df['Fuel'] == 'Biomass', 'Yes', 'No')

# assume all sites opened before 2008 and won't close

df['yearStart'] = 2008
df['yearEnd'] = 2019

# tidy up

df.rename(columns={'COMPANYNAME': 'Site', 'CHPREGION': 'Region'}, inplace=True)

df['Region'] = df['Region'].str.title()
df['Site'] = df['Site'].str.title()

df.drop(['CHPTPC (kWe)', 'TOWN', 'COUNTY', 'SECTOR'], axis=1, inplace=True)

# drop 99 sites <1MW, totalling 25MW of capacity

df = df[df['Capacity (MW)'] >= 1]

In [227]:
# import data on the lat/long of each UK postcode


post_codes = pd.read_csv('ukpostcodes.csv')

# drop lines that have lat 99.999 and long 0.000

post_codes = post_codes[post_codes.latitude!=99.999999]


In [228]:
# remove tabs from postcodes else they won't align

df['POSTCODE'] = df['POSTCODE'].str.strip()


# merge the datasets

df2 = pd.merge(df,post_codes,left_on=['POSTCODE'],right_on=['postcode'], how='left')

In [229]:
# fill the blanks

df2['latitude'] = np.where(df2['POSTCODE'].str.contains('CA14'),54.665164,df2['latitude'])
df2['longitude'] = np.where(df2['POSTCODE'].str.contains('CA14'),-3.546286,df2['longitude'])



In [230]:
# tidy up and save

df2.drop(['PRIMEMOVER','POSTCODE','postcode','id'], axis=1,inplace=True)

# export the data to a CSV ready for mapping

df2.to_csv('chp.csv', index=False, encoding='utf-8')