In [None]:
import pandas as pd
import numpy as np
import os
import clock_plot.clock as cp
import plotly.express as px
import clock_plot
import warnings
import datetime
import os
import urllib.request as urllib2
import json

In [None]:
# Suppress warnings as there is a warning coming from plotly using pd.append rather than pd.concat
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
readings_raw = pd.read_csv(os.path.join(clock_plot.CLOCK_PLOT_DIR, "..", "data", "eden_2_houseid_324_combined_data.csv"))
readings_raw["datetime"] = pd.to_datetime(readings_raw["datetime"])

In [None]:
readings = readings_raw.melt( id_vars=["datetime"], value_vars=["reading_elec","reading_gas"])
readings.rename(columns={"variable":"fuel","value":"reading"}, inplace=True)
readings["fuel"] = readings["fuel"].map({"reading_elec":"elec","reading_gas":"gas"})
readings.head()


In [None]:

readings['pre/post intervention'] = 'Pre-intervention'
readings.loc[readings['datetime'] >= datetime.datetime(2019,3,16,9), 'pre/post intervention'] = 'Post-intervention'
readings.loc[readings['datetime'] >= datetime.datetime(2020,3,1,0), 'pre/post intervention'] = 'HP Only'

In [None]:
# Plot household gas usage by time of day for colored by season
fig = cp.clock_plot(readings, datetime_col='datetime', value_col=f'reading', 
                    filters={'fuel': 'gas', 'pre/post intervention':'Pre-intervention'}, 
                    color='season',
                    title_start='Seasonal usage')

In [None]:
# Plot household gas usage by time of day, colored by month
# From this we can see that months of the same season are have similar usage.
# and months of different season have quite different usage
fig = cp.clock_plot(readings, datetime_col='datetime', value_col=f'reading', 
                    filters={'fuel': 'gas', 
                            'pre/post intervention':'Pre-intervention', 
                            'season':['Winter', 'Summer']}, 
                    color='month',
                    color_discrete_sequence=px.colors.qualitative.Prism + [px.colors.qualitative.Prism[2]],
                    title_start='Monthly usage',
                    )

In [None]:
# Plot household electricity usage by time of day, colored by weekend/weekday for different weeks and an aggregate average
# We can see the paterns in usage on weekdays compared to weekend days
fig = cp.clock_plot(readings, datetime_col='datetime', value_col=f'reading', 
                    filters={'fuel': 'elec', 'pre/post intervention':'Pre-intervention'}, 
                    color='weekend',
                    line_group='week',
                    aggregate={'weekend':'mean'},
                    color_discrete_sequence=['red', 'blue'],
                    title_start='Weekday/Weekend usage')

In [None]:
# Plot household energy useage by time of day in the summer for periods before and after an intervension
# Here we see usage of both fuels is broadly similar but the schedule seems to have shifted 1 hour earlier
fig = cp.clock_plot( readings, datetime_col="datetime", value_col="reading",
                    filters={"season":"Summer"},
                    color="fuel",
                    line_dash="pre/post intervention",
                    title_start=f'Pre/Post intervention gas and electric usage', 
                    category_orders = {
                        "pre/post intervention": ["Pre-intervention", "Post-intervention"],
                        "fuel": ["gas", "elec"],
                                    })

In [None]:
# Plot household electricity usage by hour of day, colored by season (Summer or Winter)
# with lines for each week of usage plus an aggregated average for each season
# We can see average electricity usage doesn't change greatly from season to season
# and while weekly usage is fairly predictable there are some outliers (week 48 for example)
fig = cp.clock_plot(readings, datetime_col='datetime', value_col=f'reading', 
                    filters={'fuel': 'elec', 'pre/post intervention': 'Pre-intervention', 'season': ['Summer','Winter']},
                    aggregate={'season':'mean'},
                    color='season', 
                    line_dash='pre/post intervention',
                    line_group='week',
                    title_start=f'Seasonal usage', 
                    category_orders = { "pre/post intervention": ["Pre-intervention", "Post-intervention"]},
                )                

In [None]:
# Load in Energy Mix Data
n_years_offset = 10
n_offset = 48*365*n_years_offset
n_years = 2
n_rows = 48*365*n_years
url = f"https://data.nationalgrideso.com/api/3/action/datastore_search?resource_id=f93d1835-75bc-43e5-84ad-12472b180a98&limit={n_rows}&offset={n_offset}"
fileobj = urllib2.urlopen(url)
data = fileobj.read()
datadict = json.loads(data.decode('utf-8'))
gen_mix_raw = pd.DataFrame( datadict["result"]["records"] )
gen_mix_raw.head()

In [None]:
# 'Melt' the DataFrame, to get columns of 'DATETIME', 'SOURCE' and 'VALUE'
gen_mix = gen_mix_raw.melt(id_vars = ["DATETIME"], value_vars = ["GAS","COAL","NUCLEAR","WIND","HYDRO","IMPORTS","BIOMASS","OTHER","SOLAR"])
gen_mix.rename(columns={"variable":"SOURCE", "value":"VALUE"}, inplace=True)
gen_mix.head()

In [None]:
# Plot the energy mix over the day, with lines colored by SOURCE and grouped such that each month gets a single line
fig = cp.clock_plot(gen_mix, datetime_col='DATETIME', value_col='VALUE',
                    filters={'SOURCE':['IMPORTS','WIND','SOLAR']},  
                    color='SOURCE', 
                    line_group='month',
                    title_start='Energy generation',
                    color_discrete_sequence=['red', 'blue', 'green'],
                    category_orders={ 'SOURCE': ['IMPORTS','WIND','SOLAR'] } )
# Filters selects rows where the given column contains one of the given values
# color_discrete_sequence defines the sequence of colors to be used in the lines of the chart
# category_orders defines the orders in which to plot the data, this defines the colors via the discrete_color_sequence

In [None]:
# Plot the gas usage over the day, with lines colored by season and grouped such that each month gets a single line
fig = cp.clock_plot(gen_mix, datetime_col='DATETIME', value_col='VALUE',
                    filters={'SOURCE':'SOLAR', 'year':2019},  
                    color='season', 
                    title_start='Energy generation',
                    line_group='day',
                    line_shape='linear',
                    aggregate={'season':'mean'},
                    bins_per_day=48 )
# line_shape sets whether to use spline interpolation or not. (either 'spline' or 'linear')
# aggregate sets the parameters for additional lines of aggregated data to be plotted
# bins_per_day allows datetimes to be binned - Useful when datetimes are inconsistent or use minutes