# Conventional plant

Takes data from BEIS DUKES table 5.11 and wrangles for the CB timeline map.

* NB that DUKES data is for "major power producers" only.
* This means, for eg, that recip sites bought by UK Power Reserve appear to "close"
* But the CM register shows they are still there.



In [36]:
import pandas as pd
import numpy as np
import pprint as pprint
import matplotlib.pyplot as plt
import pyproj
import re
import os

os.chdir('/Users/simon/Desktop/uk-map/')
%matplotlib inline


In [37]:
# create a dataframe listing the capacity for each plant, in each year it is operating
# this is pulling data from the spreadsheet with one sheet per year for 2004-2018
conv1 = pd.DataFrame()

for year in range(2004, 2019):
    df = pd.read_excel('DUKES_5.11_2018.xlsx', sheetname='DUKES ' + str(year)) 
    
    # rename columns
    if year >= 2016:
        df.columns = ['Company Name','Station Name','Fuel','Installed Capacity (MW)','Start Year','Region','blah']
        df.drop('blah', axis=1, inplace=True)
    elif (year == 2004) | (year == 2005):
        df.columns = ['Company Name','Station Name','Fuel','Installed Capacity (MW)','Start Year']
    else:
        df.columns = ['Company Name','Station Name','Fuel','Installed Capacity (MW)','Start Year','Region']

    # get rid of subheads with no leading whitespace
    if year < 2016: df['Company Name'] = df['Company Name'].str.replace(r'.*\:',' ')

    # strip out the bracketed footnote markers from 'Company Name'
    df['Company Name'] = df['Company Name'].str.replace(r' \(.*\)','')
    # strip out the bracketed footnote markers from 'Station Name'
    df['Station Name'] = df['Station Name'].str.replace(r' \(.*\)','')
    # strip out bracketed subheads from 'Company Name'
    df['Company Name'] = df['Company Name'].str.replace(r'\(.*\)','')
    # strip out info after colon in 'Company Name'
    df['Company Name'] = df['Company Name'].str.replace(r'\:.*','')
    
    df['Company Name'] = df['Company Name'].str.replace(r'For foot.*',' ')
    df['Company Name'] = df['Company Name'].str.replace(r'5\..*',' ')
    df['Company Name'] = df['Company Name'].str.replace(r'Company Name.*',' ')
    df['Company Name'] = df['Company Name'].str.replace(r'Station type.*',' ')
    
    
    
    # get rid of the Company Name subheads eg "Thermal"
    # these all have leading whitespace
    # code splits the name on first whitespace
    # if there's leading whitespace it replace mame with NaN
    df[['n1','n2']] = df['Company Name'].str.split(' ', expand=True, n=1)
    df['Company Name'] = np.where(df['n1'] == '', np.nan, df['Company Name'])

    # fill the blank company names with the last entry
    # (the original spreadsheet just has the company name once for each firm)
    df['Company Name'].fillna(method='ffill', inplace=True)

    # remove empty rows (based on no data for installed capacity)
    df.dropna(subset=['Installed Capacity (MW)'], inplace=True)

    # remove rows without a start year
    # (includes extra header rows)
    # (includes subtotal rows for not listed plant eg CHP, small renewables)
    df = df[pd.to_numeric(df['Start Year'], errors='coerce').notnull()]
    
    df.drop(['n1','n2'], axis=1, inplace=True)
 


    # add a data year label
    df['Data Year'] = year
    conv1 = conv1.append(df)

In [38]:
# couple of name corrections

conv1['Station Name'].replace('Heysham1','Heysham 1',inplace=True)
conv1['Station Name'].replace('Oldbury','Oldbury A',inplace=True)
conv1['Station Name'].replace('Uskmouth power','Uskmouth Power',inplace=True)
conv1['Station Name'].replace('Bridgewater District Energy','Bridgwater District Energy',inplace=True)
conv1['Station Name'].replace('Drax - biomass units','Drax',inplace=True)
conv1['Station Name'].replace('Drax - coal units','Drax',inplace=True)

conv1['Station Name'] = conv1['Station Name'].str.strip() # lose trailing spaces


In [39]:
# manually correct biomass conversion dates

# Lynemouth

conv1 = conv1.append({'Data Year': 2018 , 'Fuel' : 'Biomass', 'Installed Capacity (MW)' : 420, 'Start Year': 2018, 'Station Name': 'Lynemouth biomass'}, ignore_index=True)

# Drax
# first unit conversion in 2013
conv1['Installed Capacity (MW)'].loc[((conv1['Station Name']=='Drax'))&(conv1['Data Year']==2013)] = 3870/6*5
conv1['Fuel'].loc[((conv1['Station Name']=='Drax'))&(conv1['Data Year']==2013)] = 'Coal'
conv1['Station Name'].loc[((conv1['Station Name']=='Drax'))&(conv1['Data Year']==2013)] = 'Drax coal'
conv1 = conv1.append({'Data Year': 2013 , 'Fuel' : 'Biomass', 'Installed Capacity (MW)' : 645, 'Start Year': 1974, 'Station Name': 'Drax biomass'}, ignore_index=True)


# second unit conversion in 2014
conv1['Installed Capacity (MW)'].loc[((conv1['Station Name']=='Drax'))&((conv1['Data Year']==2014)|(conv1['Data Year']==2015))] = 3870/6*4
conv1['Fuel'].loc[((conv1['Station Name']=='Drax'))&((conv1['Data Year']==2014)|(conv1['Data Year']==2015))] = 'Coal'
conv1['Station Name'].loc[((conv1['Station Name']=='Drax'))&((conv1['Data Year']==2014)|(conv1['Data Year']==2015))] = 'Drax coal'
conv1 = conv1.append({'Data Year': 2014 , 'Fuel' : 'Biomass', 'Installed Capacity (MW)' : 1290, 'Start Year': 1974, 'Station Name': 'Drax biomass'}, ignore_index=True)
conv1 = conv1.append({'Data Year': 2015 , 'Fuel' : 'Biomass', 'Installed Capacity (MW)' : 1290, 'Start Year': 1974, 'Station Name': 'Drax biomass'}, ignore_index=True)

# third unit conversion in 2016
conv1['Installed Capacity (MW)'].loc[((conv1['Station Name']=='Drax'))&((conv1['Data Year']==2016)|(conv1['Data Year']==2017))] = 3870/6*3
conv1['Station Name'].loc[((conv1['Station Name']=='Drax'))&(conv1['Fuel']=='Coal')&((conv1['Data Year']==2016)|(conv1['Data Year']==2017))] = 'Drax coal'
conv1['Start Year'].loc[((conv1['Station Name']=='Drax'))&(conv1['Fuel']=='Coal')&((conv1['Data Year']==2016)|(conv1['Data Year']==2017))] = 1986
conv1['Station Name'].loc[((conv1['Station Name']=='Drax'))&(conv1['Fuel']=='Biomass')&((conv1['Data Year']==2016)|(conv1['Data Year']==2017))] = 'Drax biomass'

# fourth unit conversion in 2018
conv1['Installed Capacity (MW)'].loc[((conv1['Station Name']=='Drax'))&(conv1['Fuel']=='Coal')&((conv1['Data Year']==2018))] = 3870/6*2
conv1['Installed Capacity (MW)'].loc[((conv1['Station Name']=='Drax'))&(conv1['Fuel']=='Biomass')&((conv1['Data Year']==2018))] = 3870/6*4
conv1['Station Name'].loc[((conv1['Station Name']=='Drax'))&(conv1['Fuel']=='Coal')&((conv1['Data Year']==2018))] = 'Drax coal'
conv1['Start Year'].loc[((conv1['Station Name']=='Drax'))&(conv1['Fuel']=='Coal')&((conv1['Data Year']==2018))] = 1986
conv1['Station Name'].loc[((conv1['Station Name']=='Drax'))&(conv1['Fuel']=='Biomass')&((conv1['Data Year']==2018))] = 'Drax biomass'


In [40]:
# manually correct Fiddler's Ferry change in capacity

conv1['Installed Capacity (MW)'].loc[(
    (conv1['Station Name'].str.contains('Fiddl')) &
    (conv1['Installed Capacity (MW)'] > 100) &
    ((conv1['Data Year'] == 2009) | (conv1['Data Year'] == 2010)))] = 1961

In [41]:
# correct Killingholme A/B naming

conv1['Station Name'] = np.where((conv1['Station Name'] == 'Killingholme') &
                                 (conv1['Installed Capacity (MW)'] < 800),
                                 'Killingholme A', conv1['Station Name'])
conv1['Station Name'] = np.where((conv1['Station Name'] == 'Killingholme') &
                                 (conv1['Installed Capacity (MW)'] > 800),
                                 'Killingholme B', conv1['Station Name'])

In [42]:
# merge Seabank 1 and 2


conv1['Installed Capacity (MW)'] = np.where(conv1['Station Name']=='Seabank 2',1234,conv1['Installed Capacity (MW)'])
conv1['Station Name'] = np.where(conv1['Station Name']=='Seabank 2','Seabank 1 & 2',conv1['Station Name'])
conv1['Station Name'] = np.where(conv1['Station Name']=='Seabank','Seabank 1 & 2',conv1['Station Name'])

#conv1[conv1['Station Name'].str.contains('Seab')]

In [43]:
#conv1.loc[(conv1['Station Name'].str.contains('Aberth'))&(conv1['Installed Capacity (MW)']>100)].sort_values(by=['Data Year'])

In [44]:
# read in the spreadsheet with DUKES location data

df = pd.read_excel(
    'Plant location data longitude and latitude.xlsx', sheetname='DUKES2017')

# strip out info after colon in 'Company Name'
df['Company_Name'] = df['Company_Name'].str.replace(r'\:.*', '')

# rename columns to match the other data
df.columns = [
    'Company Name', 'Station Name', 'Fuel', 'CHP', 'Region', 'Category',
    'Easting', 'Northing', 'Latitude', 'Longitude'
]
#df.head()

In [45]:
# merge in the location data

conv2 = pd.merge(conv1,df,on = 'Station Name', how = 'left')

# use the fuel and region from the sheet with location data if it exists

conv2['Region'] = np.where(conv2['Region_y'].isnull(), conv2['Region_x'], conv2['Region_y'])
conv2['Fuel'] = conv2['Fuel_x']

    
# use the company name from full data

conv2['Company Name'] = conv2['Company Name_x']

# label CHP plants

conv2['CHP'] = np.where(conv2['Station Name'].str.contains('\*'),'Yes', conv2['CHP'])
conv2['CHP'] = np.where(conv2['Fuel'].str.contains('CHP'),'Yes', conv2['CHP'])
conv2['Station Name'] = conv2['Station Name'].str.replace(r' \*','')

# locate Fiddler's Ferry
# NB weirdly this tags Mucomir, a Scottish hydro plant, with the same location
# don't know why but presume we will use the REPD data to locate it anyways

conv2['Latitude']= np.where(conv2['Station Name'].str.contains(' Ferry'),53.37195, conv2['Latitude'])
conv2['Longitude']= np.where(conv2['Station Name'].str.contains(' Ferry'),-2.686922, conv2['Longitude'])

# locate Lynemouth biomass

conv2['Latitude']= np.where(conv2['Station Name']=='Lynemouth biomass',55.20475, conv2['Latitude'])
conv2['Longitude']= np.where(conv2['Station Name']=='Lynemouth biomass',-1.520356, conv2['Longitude'])


# tidy formatting
conv2['Fuel'] = conv2['Fuel'].str.strip() # lose trailing spaces
conv2['Station Name'] = conv2['Station Name'].str.strip() # lose trailing spaces
conv2['Region'] = conv2['Region'].str.strip() # lose trailing spaces
conv2['Company Name'] = conv2['Company Name'].str.strip() # lose trailing spaces
conv2['Company Name'] = conv2['Company Name'].str.replace(r' \*','')
conv2['Region'].fillna('',inplace=True)

# drop the extra columns

conv2.drop(['Company Name_x','Company Name_y','Fuel_x','Fuel_y','Region_x','Region_y','Easting','Northing'], axis=1, inplace=True)


In [46]:
# map fuels and types based on CSV dictionary files
# the CSVs were built manually from the list of unique fuel types

di1 = pd.read_csv(
    'fueldict.csv', header=None, index_col=0, squeeze=True).to_dict()
di2 = pd.read_csv(
    'typedict.csv', header=None, index_col=0, squeeze=True).to_dict()

# create a new column "type" based on di2
conv2['Type'] = conv2['Fuel'].map(di2)

# simplify "fuel" based on di1 or retain existing value if there's no match
conv2['Fuel'] = conv2['Fuel'].map(di1).fillna(conv2['Fuel'])


# correct gas oil

conv2['Type'] = np.where(conv2['Type']=='Gas oil','Gas turbine',conv2['Type'])

# tag sites as renewable and/or low-carbon based on classifications below

di3 = {
    'Coal': 'No',
    'Oil': 'No',
    'Gas': 'No',
    'Nuclear': 'No',
    'Hydro': 'Yes',
    'Biomass': 'Yes',
    'Waste': 'Yes',
    'Wind': 'Yes',
    'Solar': 'Yes'
}

conv2['Renewable'] = conv2['Fuel'].map(di3)

di4 = {
    'Coal': 'No',
    'Oil': 'No',
    'Gas': 'No',
    'Nuclear': 'Yes',
    'Hydro': 'Yes',
    'Biomass': 'Yes',
    'Waste': 'Yes',
    'Wind': 'Yes',
    'Solar': 'Yes'
}

conv2['Low-Carbon'] = conv2['Fuel'].map(di4)

In [47]:
# load other maps to cross-reference locations

# old UK map seems to have data issues so taking this out
# old = pd.read_csv('old_map_locations.csv')
# old = old[['site_name','latitude','longitude']]
# conv3 = pd.merge(conv2,old,left_on = 'Station Name',right_on = 'site_name', how = 'left')

#conv3['Latitude'] = np.where(conv3['Latitude'].isnull(),conv3['latitude'],conv3['Latitude'])
#conv3['Longitude'] = np.where(conv3['Longitude'].isnull(),conv3['longitude'],conv3['Longitude'])

conv3 = conv2

# world coal map
coal = pd.read_csv('coal_map.csv')
coal = coal[['Plant','Latitude','Longitude']]
coal.columns = ['Plant','Lat_coal','Long_coal']

coal['Plant'] = coal['Plant'].str.replace(r' power station','')
coal['Plant'] = coal['Plant'].str.replace(r'-',' ')

conv4 = pd.merge(conv3,coal,left_on = 'Station Name',right_on = 'Plant', how = 'left')

conv4['Latitude'] = np.where(conv4['Latitude'].isnull(),conv4['Lat_coal'],conv4['Latitude'])
conv4['Longitude'] = np.where(conv4['Longitude'].isnull(),conv4['Long_coal'],conv4['Longitude'])


# nuclear map
nuclear = pd.read_csv('nuclear_map.csv')
nuclear = nuclear.loc[nuclear['Country']=='UK']
nuclear = nuclear[['Unit','Latitude','Longitude']]
nuclear.columns = ['Plant','Lat_nuc','Long_nuc']
nuclear['Plant'] = nuclear['Plant'].str.replace(r'-.*',' ')
nuclear['Plant'] = nuclear['Plant'].str.title()
nuclear['Plant'] = nuclear['Plant'].str.strip() # lose trailing spaces

conv5 = pd.merge(conv4,nuclear,left_on = 'Station Name',right_on = 'Plant', how = 'left')

conv5['Latitude'] = np.where(conv5['Fuel']=='Nuclear',conv5['Lat_nuc'],conv5['Latitude'])
conv5['Longitude'] = np.where(conv5['Fuel']=='Nuclear',conv5['Long_nuc'],conv5['Longitude'])

# Power Watch

watch = pd.read_csv('global_power_plant_database.csv')
watch = watch.loc[watch['country']=='GBR']
watch = watch[['name','capacity_mw','latitude','longitude','estimated_generation_gwh']]
watch.columns = ['name','capacity_mw','latitude_watch','longitude_watch','estimated_generation_gwh']

conv6 = pd.merge(conv5, watch, left_on = 'Station Name', right_on = 'name', how = 'left')
#conv6.loc[(conv6['Latitude'].isnull())&(conv6['latitude_y'].notnull())]

conv6['Latitude'] = np.where(conv6['Latitude'].isnull(),conv6['latitude_watch'],conv6['Latitude'])
conv6['Longitude'] = np.where(conv6['Longitude'].isnull(),conv6['longitude_watch'],conv6['Longitude'])

#conv6.drop(['site_name','latitude','longitude','latitude_watch','longitude_watch','Plant_x','Plant_y',
     #       'Lat_coal','Long_coal','Lat_nuc','Long_nuc','name','capacity_mw'], axis=1, inplace=True)

In [48]:
# pull the manually found locations for missing sites

di = pd.read_csv('locations.csv', header=None, index_col=0, squeeze=True).to_dict()

# map locations to data or retain existing value if there's no match
conv6['Latitude'] = conv6['Station Name'].map(di[1]).fillna(conv6['Latitude'])
conv6['Longitude'] = conv6['Station Name'].map(di[2]).fillna(conv6['Longitude'])

print 'Missing locations: ' + str(conv6['Station Name'].loc[(conv6['Latitude'].isnull())&(conv6['Renewable']=='No')].nunique())
conv6['Station Name'].loc[(conv6['Latitude'].isnull())&(conv6['Renewable']=='No')].unique()

Missing locations: 8


array([u'Wheldale', u'Seabank 1', u'St Marys', u'Princetown', u'Lynton',
       u'Roseland', u'Ravenhead', 'Drax coal'], dtype=object)

In [49]:
# fix MW changes at Staythorpe C and Pembroke

conv6['Installed Capacity (MW)'] = np.where(
    conv6['Station Name'] == 'Staythorpe C', 1772,
    conv6['Installed Capacity (MW)'])

conv6['Installed Capacity (MW)'] = np.where(
    conv6['Station Name'] == 'Pembroke', 2199,
    conv6['Installed Capacity (MW)'])

In [50]:
# group the plants

# first need to make some data numeric

conv6['Latitude'] = pd.to_numeric(conv6['Latitude'], errors='coerce')
conv6['Longitude'] = pd.to_numeric(conv6['Longitude'], errors='coerce')
conv6['Installed Capacity (MW)'] = pd.to_numeric(conv6['Installed Capacity (MW)'], errors='coerce')
conv6['Start Year'] = pd.to_numeric(conv6['Start Year'], errors='coerce')

# do the grouping by station Name and capacity
# gives a new row for each change in capacity and for each plant

conv7 = conv6.groupby(
    ['Station Name','Installed Capacity (MW)']).agg(
    {'Data Year' : ['min','max'],
    'CHP' : 'last',
    'Start Year' : 'min',
     'Category' : 'last',
     'Latitude' : 'mean',
     'Longitude' : 'mean',
     'Region' : 'last',
     'Fuel' : 'last',
     'Company Name' : 'last',
     'Type' : 'last',
     'Renewable' : 'last',
     'Low-Carbon' : 'last',
     'estimated_generation_gwh' : 'first',
     }).reset_index()

# flatten the output, joining with "_"
conv7.columns = conv7.columns.get_level_values(0)

#conv7.head()
#conv7.columns.tolist()
# rename the columns
conv7.columns = ['Site','Capacity (MW)','Category','yearStart','yearEnd','Region','Year Open',
                 'Longitude','CHP','Fuel','Latitude','Type','Low-Carbon','GWh (est)','Renewable','Company']

# sort the results
conv7.sort_values(by=['Site','yearStart'], ascending=True, inplace=True)

conv7 = conv7[['Site','Capacity (MW)','yearStart','yearEnd','Region','Year Open','Fuel','Type',
               'Latitude','Longitude','GWh (est)','Company','Low-Carbon','Renewable','CHP','Category']]

In [51]:
# manually locate and separate the Drax units
# use ± in latitude as the plant lies roughly N-S
# ±0.00025 is roughly ±50m
# https://en.wikipedia.org/wiki/Decimal_degrees

conv7['Longitude'].loc[(conv7['Site'].str.contains('Drax'))&(conv7['Capacity (MW)']>100)] = -0.999021
conv7['Latitude'].loc[(conv7['Site'].str.contains('Drax'))&(conv7['Fuel']=='Coal')] = 53.737196 + 0.00035
conv7['Latitude'].loc[(conv7['Site'].str.contains('Drax'))&(conv7['Fuel']=='Biomass')] = 53.737196 

In [52]:
# correct Deeside gas location

conv7['Latitude'][conv7.Site=='Deeside'] = 53.233444
conv7['Longitude'][conv7.Site=='Deeside'] = -3.054583


In [53]:
# manually correct Ballylumford sub-sites
# NB also that B station is to close
# http://www.irishnews.com/business/2018/11/10/news/reprieve-for-kilroot-power-station-but-85-jobs-set-to-go-at-ballylumford-1481772/

#conv7.Site[conv7.Site=='Ballylumford']

# make sure the index is unique
conv7 = conv7.reset_index(drop=True)

conv7 = conv7.drop(conv7[(conv7.Site == 'Ballylumford')].index)
conv7['yearStart'][conv7.Site=='Ballylumford B OCGT'] = 2004
conv7['yearEnd'][conv7.Site=='Ballylumford B OCGT'] = 2018
conv7['Year Open'][conv7.Site=='Ballylumford B OCGT'] = 1976

conv7['yearStart'][conv7.Site=='Ballylumford C'] = 2004
conv7['yearEnd'][conv7.Site=='Ballylumford C'] = 2018
conv7['Year Open'][conv7.Site=='Ballylumford C'] = 2003

conv7 = conv7.drop(conv7[(conv7.Site=='Ballylumford B')&(conv7['Capacity (MW)']<500)].index)

conv7['Year Open'][conv7.Site=='Ballylumford B'] = 1974
conv7['yearStart'][(conv7.Site=='Ballylumford B')&(conv7['Capacity (MW)']<600)] = 2004
conv7['yearEnd'][(conv7.Site=='Ballylumford B')&(conv7['Capacity (MW)']<600)] = 2015
conv7['Capacity (MW)'][(conv7.Site=='Ballylumford B')&(conv7['Capacity (MW)']<600)] = 540

conv7['yearStart'][(conv7.Site=='Ballylumford B')&(conv7['Capacity (MW)']>600)] = 2016
conv7['yearEnd'][(conv7.Site=='Ballylumford B')&(conv7['Capacity (MW)']>600)] = 2018
conv7['Capacity (MW)'][(conv7.Site=='Ballylumford B')&(conv7['Capacity (MW)']>600)] = 250


#conv7[conv7.Site.str.contains('Ballylumford')]

In [54]:
# pull the manually found locations for missing sites again

di = pd.read_csv('locations.csv', header=None, index_col=0, squeeze=True).to_dict()

# map locations to data or retain existing value if there's no match

conv7['Latitude'] = conv7['Site'].map(di[1]).fillna(conv7['Latitude'])
conv7['Longitude'] = conv7['Site'].map(di[2]).fillna(conv7['Longitude'])

In [55]:
# fix Kilroot changes in capacity / incorrect split between coal and OCGT

conv7['Capacity (MW)'].loc[(conv7['Site'] == 'Kilroot')
                           & (conv7['yearStart'] == 2004)] = 560
conv7['yearEnd'].loc[(conv7['Site'] == 'Kilroot')
                     & (conv7['yearStart'] == 2004)] = 2018
conv7['Category'].loc[(conv7['Site'] == 'Kilroot')
                      & (conv7['yearStart'] == 2004)] = 'Coal/oil'

conv7['Capacity (MW)'].loc[(conv7['Site'] == 'Kilroot')
                           & (conv7['yearStart'] == 2009)] = 140
conv7['yearStart'].loc[(conv7['Site'] == 'Kilroot')
                       & (conv7['yearEnd'] == 2009)] = 2004
conv7['Type'].loc[(conv7['Site'] == 'Kilroot')
                  & (conv7['yearEnd'] == 2009)] = 'Gas turbine'
conv7['Fuel'].loc[(conv7['Site'] == 'Kilroot')
                  & (conv7['yearEnd'] == 2009)] = 'Gas'
conv7['yearEnd'].loc[(conv7['Site'] == 'Kilroot')
                     & (conv7['Fuel'] == 'Gas')] = 2018

# make sure the index is unique
conv7 = conv7.reset_index(drop=True)

conv7 = conv7.drop(
    conv7[(conv7.Site == 'Kilroot') & (conv7.yearStart > 2005)].index)

In [56]:
# add data for plants operating in future

# select plants still operating in 2018, not reneawable and not coal
future = conv7.loc[(conv7['yearEnd'] == 2018) &
                   (conv7['Fuel'] != 'Coal')]

In [57]:
# suppress errors in the "for" loops (I didn't understand them and they didn't seem to matter)
pd.options.mode.chained_assignment = None  # default='warn'

In [58]:
# nuclear plants

# add Hinkley C
future['Site'].loc[future['Site'] == 'Hinkley Point B'] = 'Hinkley Point C'
future['Latitude'].loc[future['Site'] == 'Hinkley Point C'] = 51.206
future['Longitude'].loc[future['Site'] == 'Hinkley Point C'] = -3.144
future['Capacity (MW)'].loc[future['Site']=='Hinkley Point C'] = 3200
future['Year Open'].loc[future['Site']=='Hinkley Point C'] = 2026
future['GWh (est)'].loc[future['Site']=='Hinkley Point C'] = 3200*24*365*0.9/1000

# delete all except Sizewell B and Hinkley C

# make sure the index is unique
future = future.reset_index(drop=True)


future = future.drop(future[(future.Fuel == 'Nuclear') & (
    (future.Site != 'Sizewell B') & (future.Site != 'Hinkley Point C'))].index)


In [59]:
# gas plants

# all the old ones built before 2000
# https://docs.google.com/spreadsheets/d/1Ma1RMisQSegfKc8rNFzG4Clo-WpCOsPe0W0vbLWfo1g/edit#gid=0

# Ballylumford B and B OCGT are due to close
# www.irishnews.com/business/2018/11/10/news/reprieve-for-kilroot-power-station-but-85-jobs-set-to-go-at-ballylumford-1481772/
# we are resuing these entries for new plants, see below


# delete Corby, required to retire by 2023 under IED
# https://sandbag.org.uk/wp-content/uploads/2018/05/Coal-To-Clean-May-2018.pdf

# make sure the index is unique before dropping rows
future = future.reset_index(drop=True)

future = future.drop(future[(
    (future.Site == 'Corby'))].index)

# add Keadby 2 currently under construction
# https://sse.com/whatwedo/ourprojectsandassets/thermal/keadby2/

future.Site[future.Site == 'Ballylumford B OCGT'] = 'Keadby 2'
future.Latitude[future.Site == 'Keadby 2'] = 53.595855
future.Longitude[future.Site == 'Keadby 2'] = -0.756291
future['Capacity (MW)'][future.Site == 'Keadby 2'] = 840
future.yearStart[future.Site == 'Keadby 2'] = 2019
future.yearEnd[future.Site == 'Keadby 2'] = 2019
future['Year Open'][future.Site == 'Keadby 2'] = 2022 # "early 2020s"
future.Company[future.Site == 'Keadby 2'] = 'SSE'
future.Fuel[future.Site == 'Keadby 2'] = 'Gas'
future.Type[future.Site == 'Keadby 2'] = 'Combined cycle'


# other new gas plants
#future = future.drop(.index)

# Spalding OCGT under construction

future.Site[future.Site == 'Ballylumford B'] = 'Spalding expansion'
future.Latitude[future.Site == 'Spalding expansion'] = 52.804313
future.Longitude[future.Site == 'Spalding expansion'] = -0.132970
future['Capacity (MW)'][future.Site == 'Spalding expansion'] = 300
future.yearStart[future.Site == 'Spalding expansion'] = 2019
future.yearEnd[future.Site == 'Spalding expansion'] = 2019
future['Year Open'][future.Site == 'Spalding expansion'] = 2019 
future.Company[future.Site == 'Spalding expansion'] = 'Intergen'
future.Fuel[future.Site == 'Spalding expansion'] = 'Gas'
future.Type[future.Site == 'Spalding expansion'] = 'Gas turbine '


# Kings Lynn to reopen after refurb

future = future.append(conv7[(conv7.Site=='Kings Lynn')],ignore_index=True)

# make sure the index is unique before dropping rows
future = future.reset_index(drop=True)
future = future.drop(future[(future.Site =='Kings Lynn')&(future.yearStart==2012)].index)

future['Capacity (MW)'][future.Site =='Kings Lynn'] = 370
future.yearStart[future.Site =='Kings Lynn'] = 2019
future.yearEnd[future.Site =='Kings Lynn'] = 2019
future['Year Open'][future.Site =='Kings Lynn'] = 2019

In [60]:
#future[future.Site.str.contains('Kings Lynn')]

In [61]:
# renewable retirements
# print amount that is likely to have retired by 2030 as over 25
# drop wind/solar built before 2005

print str(future['Capacity (MW)'][(future.Renewable =='Yes')&(future['Year Open']<2005)&((future.Fuel == 'Wind')|(future.Fuel == 'Solar'))].sum()) + 'MW of wind that will be over 25 years old in 2030'
#future = future.drop(future['Capacity (MW)'][(future.Renewable =='Yes')&(future['Year Open']<2005)&((future.Fuel == 'Wind')|(future.Fuel == 'Solar'))].index)

1133.33MW of wind that will be over 25 years old in 2030


In [62]:
# make the future entries start and end in 2019

future.yearStart = 2019
future.yearEnd = 2019


In [63]:
# append the future rows to the existing rows

conv7 = conv7.append(future)

In [64]:
# drop the last few tiny sites without a location

conv7.dropna(subset=['Longitude'], inplace=True)

# drop all the renewable stuff as we will get it back from the REPD
# apart from waste (not sure on coverage of REPD)
# excl Drax bio, Tilbury B bio and Ironbridge bio

# first reset the index

conv8 = conv7.reset_index(drop=True)
conv8 = conv8.drop(conv8[(conv8.Fuel == 'Wind') | (conv8.Fuel == 'Solar') | (conv8.Fuel == 'Hydro')].index)

# fix opening years not matching yearStart for recent gas plants

conv8.yearStart = np.where((conv8.Fuel == 'Gas') & (conv8['Capacity (MW)'] > 100) &
      (conv8['Year Open'] < conv8['yearStart']) & (conv8['Year Open'] > 2007) &
      (conv8['Year Open'] < 2019)&(conv8['yearStart'] == (conv8['Year Open']+1)),conv8['Year Open'],conv8.yearStart)


# export the data to a CSV ready for mapping
conv8.to_csv('conventional-plant.csv', index=False, encoding='utf-8')

# checks and analysis

In [29]:
#print str(conv1['Station Name'][(conv1.Fuel.str.contains('oal'))&(conv1['Data Year']==2008)].count())
conv1[(conv1.Fuel.str.contains('gas'))&(conv1['Data Year']==2008)&(conv1['Installed Capacity (MW)']>100)]

Unnamed: 0,Company Name,Data Year,Fuel,Installed Capacity (MW),Region,Start Year,Station Name
952,Baglan Generation Ltd,2008,gas turbine,575,Wales,2002,Baglan Bay
999,Derwent Cogeneration,2008,gas CHP,236,East Midlands,1994,Derwent
1017,E.On UK,2008,gas oil,132,London,1979,Taylor's Lane GT
1050,Gaz de France,2008,gas CHP,180,Wales,2001,Shotton
1052,Immingham CHP LLP,2008,gas CHP,741,Yorkshire and,2004,Immingham CHP
1053,International Power / Mitsui,2008,gas oil/kerosene,140,South West,1996,Indian Queens
1064,Magnox Electric Ltd,2008,gas CHP,180,North West,1995,Fellside CHP
1066,Premier Power Ltd,2008,gas/oil,360,Northern Ireland,1968,Ballylumford B
1081,RWE Npower Plc,2008,coal/gas,1958,South East,1972,Didcot A
1083,RWE Npower Plc,2008,gas oil,140,South East,1982,Cowes


In [65]:
conv1[conv1['Station Name'].str.contains('Killing')]

Unnamed: 0,Company Name,Data Year,Fuel,Installed Capacity (MW),Region,Start Year,Station Name
61,Killingholme Power Ltd,2004,CCGT,650,,1994,Killingholme A
228,Centrica,2005,CCGT,650,,1994,Killingholme A
256,E.On UK,2005,CCGT,900,,1993,Killingholme B
468,Centrica,2006,CCGT,660,the Humber,1994,Killingholme A
499,E.On UK,2006,CCGT,900,Yorkshire and,1993,Killingholme B
723,Centrica,2007,CCGT,665,the Humber,1994,Killingholme A
755,E.On UK,2007,CCGT,900,Yorkshire and,1993,Killingholme B
987,Centrica,2008,CCGT,665,the Humber,1994,Killingholme A
1021,E.On UK,2008,CCGT,900,Yorkshire and,1993,Killingholme B
1277,Centrica,2009,CCGT,665,the Humber,1994,Killingholme A


In [31]:
conv2[conv2['Station Name'].str.contains('Killing')]

Unnamed: 0,Data Year,Installed Capacity (MW),Start Year,Station Name,CHP,Category,Latitude,Longitude,Region,Fuel,Company Name,Type,Renewable,Low-Carbon
61,2004,650,1994,Killingholme A,,,,,,Gas,Killingholme Power Ltd,Combined cycle,No,No
228,2005,650,1994,Killingholme A,,,,,,Gas,Centrica,Combined cycle,No,No
256,2005,900,1993,Killingholme B,,,,,,Gas,E.On UK,Combined cycle,No,No
468,2006,660,1994,Killingholme A,,,,,the Humber,Gas,Centrica,Combined cycle,No,No
499,2006,900,1993,Killingholme B,,,,,Yorkshire and,Gas,E.On UK,Combined cycle,No,No
723,2007,665,1994,Killingholme A,,,,,the Humber,Gas,Centrica,Combined cycle,No,No
755,2007,900,1993,Killingholme B,,,,,Yorkshire and,Gas,E.On UK,Combined cycle,No,No
987,2008,665,1994,Killingholme A,,,,,the Humber,Gas,Centrica,Combined cycle,No,No
1021,2008,900,1993,Killingholme B,,,,,Yorkshire and,Gas,E.On UK,Combined cycle,No,No
1277,2009,665,1994,Killingholme A,,,,,the Humber,Gas,Centrica,Combined cycle,No,No


In [66]:
conv3[conv3['Station Name'].str.contains('Killing')]

Unnamed: 0,Data Year,Installed Capacity (MW),Start Year,Station Name,CHP,Category,Latitude,Longitude,Region,Fuel,Company Name,Type,Renewable,Low-Carbon
61,2004,650,1994,Killingholme A,,,,,,Gas,Killingholme Power Ltd,Combined cycle,No,No
228,2005,650,1994,Killingholme A,,,,,,Gas,Centrica,Combined cycle,No,No
256,2005,900,1993,Killingholme B,,,,,,Gas,E.On UK,Combined cycle,No,No
468,2006,660,1994,Killingholme A,,,,,the Humber,Gas,Centrica,Combined cycle,No,No
499,2006,900,1993,Killingholme B,,,,,Yorkshire and,Gas,E.On UK,Combined cycle,No,No
723,2007,665,1994,Killingholme A,,,,,the Humber,Gas,Centrica,Combined cycle,No,No
755,2007,900,1993,Killingholme B,,,,,Yorkshire and,Gas,E.On UK,Combined cycle,No,No
987,2008,665,1994,Killingholme A,,,,,the Humber,Gas,Centrica,Combined cycle,No,No
1021,2008,900,1993,Killingholme B,,,,,Yorkshire and,Gas,E.On UK,Combined cycle,No,No
1277,2009,665,1994,Killingholme A,,,,,the Humber,Gas,Centrica,Combined cycle,No,No


In [67]:
conv6[conv6['Station Name'].str.contains('Killing')]

Unnamed: 0,Data Year,Installed Capacity (MW),Start Year,Station Name,CHP,Category,Latitude,Longitude,Region,Fuel,...,Lat_coal,Long_coal,Plant_y,Lat_nuc,Long_nuc,name,capacity_mw,latitude_watch,longitude_watch,estimated_generation_gwh
74,2004,650.0,1994,Killingholme A,,,53.659153,-0.255604,,Gas,...,,,,,,,,,,
252,2005,650.0,1994,Killingholme A,,,53.659153,-0.255604,,Gas,...,,,,,,,,,,
282,2005,900.0,1993,Killingholme B,,,53.654356,-0.255611,,Gas,...,,,,,,,,,,
506,2006,660.0,1994,Killingholme A,,,53.659153,-0.255604,the Humber,Gas,...,,,,,,,,,,
539,2006,900.0,1993,Killingholme B,,,53.654356,-0.255611,Yorkshire and,Gas,...,,,,,,,,,,
773,2007,665.0,1994,Killingholme A,,,53.659153,-0.255604,the Humber,Gas,...,,,,,,,,,,
807,2007,900.0,1993,Killingholme B,,,53.654356,-0.255611,Yorkshire and,Gas,...,,,,,,,,,,
1047,2008,665.0,1994,Killingholme A,,,53.659153,-0.255604,the Humber,Gas,...,,,,,,,,,,
1083,2008,900.0,1993,Killingholme B,,,53.654356,-0.255611,Yorkshire and,Gas,...,,,,,,,,,,
1349,2009,665.0,1994,Killingholme A,,,53.659153,-0.255604,the Humber,Gas,...,,,,,,,,,,


In [68]:
conv7[conv7['Site'].str.contains('Killing')]

Unnamed: 0,Site,Capacity (MW),yearStart,yearEnd,Region,Year Open,Fuel,Type,Latitude,Longitude,GWh (est),Company,Low-Carbon,Renewable,CHP,Category
925,Killingholme A,650.0,2004,2005,,1994,Gas,Combined cycle,53.659153,-0.255604,,Centrica,No,No,,
926,Killingholme A,660.0,2006,2006,the Humber,1994,Gas,Combined cycle,53.659153,-0.255604,,Centrica,No,No,,
927,Killingholme A,665.0,2007,2015,Yorkshire and the Humber,1994,Gas,Combined cycle,53.659153,-0.255604,,Centrica,No,No,,
928,Killingholme A,0.0,2016,2016,Yorkshire and the Humber,1994,Gas,Combined cycle,53.659153,-0.255604,,Centrica,No,No,,
929,Killingholme A,600.0,2017,2018,East Midlands,1993,Gas,Gas turbine,53.659153,-0.255604,,Uniper UK Limited,No,No,,
930,Killingholme B,900.0,2005,2016,Yorkshire and the Humber,1993,Gas,Combined cycle,53.654356,-0.255611,,E.On UK,No,No,,
535,Killingholme A,600.0,2019,2019,East Midlands,1993,Gas,Gas turbine,53.659153,-0.255604,,Uniper UK Limited,No,No,,


In [33]:
old = pd.read_csv('old_map_locations.csv')

Unnamed: 0,site_name,latitude,longitude
987,Baglan Bay CCGT,52.7997,-1.3642
1193,Baglan Energy Park Solar Farm,52.13279,-0.68654
1697,Baglan Bay OCGT,,


In [91]:
old2 = pd.read_csv('old_map_locations.csv')
old2 = old2[['site_name','latitude','longitude']]

In [92]:
old2[old.site_name.str.contains('Killing')]

Unnamed: 0,site_name,latitude,longitude
1630,Killingholme A,,
1633,Killingholme B,,


In [170]:
df[df['Station Name'].str.contains('Seab')]

Unnamed: 0,Company Name,Station Name,Fuel,CHP,Region,Category,Easting,Northing,Latitude,Longitude
98,Seabank Power Limited,Seabank 1 & 2,CCGT,,South West,CCGT,343425,161749,51.5393,-2.67024


In [265]:
conv8[conv8.Site.str.contains('Staythorpe')].sort_values(['yearStart'])

Unnamed: 0,Site,Capacity (MW),yearStart,yearEnd,Region,Year Open,Fuel,Type,Latitude,Longitude,GWh (est),Company,Low-Carbon,Renewable,CHP,Category
584,Staythorpe C,1772.0,2011,2018,East Midlands,2010,Gas,Combined cycle,53.0747,-0.855731,7905.378612,RWE Generation SE,No,No,,CCGT
934,Staythorpe C,1772.0,2019,2019,East Midlands,2010,Gas,Combined cycle,53.0747,-0.855731,7905.378612,RWE Generation SE,No,No,,CCGT


In [35]:
watch[watch.name==('Killingholme')]

Unnamed: 0,name,capacity_mw,latitude_watch,longitude_watch,estimated_generation_gwh
18836,Killingholme,600.0,53.6535,-0.2556,2676.764767


In [289]:
conv8.yearStart = np.where((conv8.Fuel == 'Gas') & (conv8['Capacity (MW)'] > 100) &
      (conv8['Year Open'] < conv8['yearStart']) & (conv8['Year Open'] > 2007) &
      (conv8['Year Open'] < 2019)&(conv8['yearStart'] == (conv8['Year Open']+1)),conv8['Year Open'],conv8.yearStart)

conv8[(conv8.Fuel == 'Gas') & (conv8['Capacity (MW)'] < 100) &
      (conv8['Year Open'] < conv8['yearStart']) & (conv8['Year Open'] > 2007) &
      (conv8['Year Open'] < 2019)].sort_values(['Site','yearStart'])

Unnamed: 0,Site,Capacity (MW),yearStart,yearEnd,Region,Year Open,Fuel,Type,Latitude,Longitude,GWh (est),Company,Low-Carbon,Renewable,CHP,Category
63,Blackburn,59.0,2015,2018,North West,2011,Gas,Combined cycle,53.7182,-2.537466,263.215202,Scottish power,No,No,,CCGT
711,Blackburn,59.0,2019,2019,North West,2011,Gas,Combined cycle,53.7182,-2.537466,263.215202,Scottish power,No,No,,CCGT


In [290]:
conv8[conv8.Site.str.contains('Blackburn')]

Unnamed: 0,Site,Capacity (MW),yearStart,yearEnd,Region,Year Open,Fuel,Type,Latitude,Longitude,GWh (est),Company,Low-Carbon,Renewable,CHP,Category
63,Blackburn,59.0,2015,2018,North West,2011,Gas,Combined cycle,53.7182,-2.537466,263.215202,Scottish power,No,No,,CCGT
64,Blackburn Meadows,33.0,2015,2017,Yorkshire and the Humber,2015,Biomass,,53.4212,-1.401916,,E.On UK,Yes,Yes,,Biomass
711,Blackburn,59.0,2019,2019,North West,2011,Gas,Combined cycle,53.7182,-2.537466,263.215202,Scottish power,No,No,,CCGT
