# Pip install ipywidgets to environment when running for first time

In [1]:
!pip install ipywidgets==8.1.1
!jupyter nbextension enable --py widgetsnbextension

Collecting ipywidgets==8.1.1
  Obtaining dependency information for ipywidgets==8.1.1 from https://files.pythonhosted.org/packages/4a/0e/57ed498fafbc60419a9332d872e929879ceba2d73cb11d284d7112472b3e/ipywidgets-8.1.1-py3-none-any.whl.metadata
  Downloading ipywidgets-8.1.1-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.9 (from ipywidgets==8.1.1)
  Obtaining dependency information for widgetsnbextension~=4.0.9 from https://files.pythonhosted.org/packages/29/03/107d96077c4befed191f7ad1a12c7b52a8f9d2778a5836d59f9855c105f6/widgetsnbextension-4.0.9-py3-none-any.whl.metadata
  Downloading widgetsnbextension-4.0.9-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab-widgets~=3.0.9 (from ipywidgets==8.1.1)
  Obtaining dependency information for jupyterlab-widgets~=3.0.9 from https://files.pythonhosted.org/packages/e8/05/0ebab152288693b5ec7b339aab857362947031143b282853b4c2dd4b5b40/jupyterlab_widgets-3.0.9-py3-none-any.whl.metadata
  Downloading jupyterlab_widgets-3.0.9-

# Importing modules and setting up classes

In [1]:
from IPython.display import clear_output, display
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime
import ipywidgets as widgets
import plotly.express as px
import pandas as pd
import pandas as pd
import numpy as np
import requests
import os

In [2]:
# Setting up timing mappings to translate MATLAB syntax
TIMING_MAPPING = {
    "hourly": "H",
    "daily": "D",
    "minutely": "T"
}

REVERSE_TIMING_MAPPING = {v: k for k, v in TIMING_MAPPING.items()}

class CTDataProcessor:
    
    def __init__(self, timing="hourly"):
        self.timing = TIMING_MAPPING.get(timing, timing)

    def get_human_readable_timing(self):
        return REVERSE_TIMING_MAPPING.get(self.timing, self.timing)

    def process(self, filename="Results.csv"):

        # Read CT Data
        df = self.read_CT_data(filename)
        
        # get the list of deviceName
        device_names = self.list_machines(df)
        
        # Remove Power Failure detected rows
        df = df[df['powerFailureDetected'] != 1]

        # Export data for each machine
        for device in device_names:
            # Get current data
            temp_df = df[df['deviceName'] == device][["A", "channel1", "channel2", "channel3"]].dropna()
            
            # Aggregate data
            temp_df = temp_df.resample(self.timing).mean().fillna(0)

            # Placeholder columns 
            # ARE ANY OF THESE NEEDED? CAN ADD COLUMNS AS REQURED
            temp_df["V"] = 0
            temp_df["kW"] = 0
            temp_df["cost"] = 0
            
            directory_name = str(device)
            if not os.path.exists(directory_name):
                os.makedirs(directory_name)
                
            file_name = os.path.join(directory_name, f"{self.get_human_readable_timing()}_{device}.csv")
            temp_df.to_csv(file_name)

    def read_CT_data(self, filename):
        df = pd.read_csv(filename)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df.set_index('timestamp', inplace=True)
        return df.dropna()

    def list_machines(self, df):
        devices = df['deviceName'].unique()
        return devices
    
class CTDataAnalyser:
    
    def __init__(self, MACHINE_NAMES, TIMING, PHASE, VOLTAGE, DAY_UNIT_COST, NIGHT_UNIT_COST, NIGHT_TARIFF_START_TIME, NIGHT_TARIFF_END_TIME, REGION):
        self.MACHINE_NAMES = MACHINE_NAMES
        self.TIMING = TIMING
        self.PHASE = PHASE
        self.VOLTAGE = VOLTAGE
        self.DAY_UNIT_COST = DAY_UNIT_COST
        self.NIGHT_UNIT_COST = NIGHT_UNIT_COST
        self.NIGHT_TARIFF_START_TIME = NIGHT_TARIFF_START_TIME
        self.NIGHT_TARIFF_END_TIME = NIGHT_TARIFF_END_TIME
        self.REGION = REGION

    def machine_calculations(self):
        
        # Getting adjusted timescale
        self.tscale = self.adjust_timescale()

        for device_name in self.MACHINE_NAMES:
            
            # Loading dataframe of individual machine
            device_df = pd.read_csv(f'{device_name}/{self.TIMING}_{device_name}.csv')

            device_df = self.compute_kW_utilization_and_cost(device_df, device_name)

            device_df = self.estimate_load_imbalance(device_df, device_name)

            device_df = self.estimate_carbon_emissions(device_df)

            device_df.to_csv(f'{device_name}/{self.TIMING}_{device_name}.csv', index=False)

    # Calculates deviation from average between current channels for each individual current channel
    def unbalanced(self, channels):

        # Convert to numpy array for easier calculations
        channels = np.array(channels).T # Transpose to get channels as columns

        Iav = np.mean(channels, axis=1)

        u = np.max(np.abs(channels - Iav[:, np.newaxis]), axis=1) / Iav * 100
        u[np.isnan(u)] = 0
        
        return u
    
    def adjust_timescale(self):
        if self.TIMING == 'hourly':
            tscale = 1
        elif self.TIMING == 'minutely':
            tscale = 60
        elif self.TIMING == 'daily':
            tscale = 1/24
        elif self.TIMING == 'weekly':
            tscale = 1/(24*7)
        else:
            raise ValueError("Invalid timing value")
        return tscale
    
    def compute_kW_utilization_and_cost(self, device_df, device_name):
        # Only 3-phase Machines' Current should be multiplied by sqrt(3)
        device_df['A'] = device_df['A'] * (3 ** 0.5 if self.PHASE[self.MACHINE_NAMES.index(device_name)] == 3 else 1) / self.tscale

        # Insert the voltages of individual machines
        device_df['V'] = self.VOLTAGE[self.MACHINE_NAMES.index(device_name)]

        # Estimate kW
        device_df['kW'] = device_df['V'] * device_df['A'] / 1000

        # Estimate Utilization
        device_df['utilization'] = (device_df['A'] > 1).astype(int)

        # Estimate unit cost of electricity
        device_df['p'] = self.DAY_UNIT_COST[self.MACHINE_NAMES.index(device_name)]
        device_df['timestamp'] = pd.to_datetime(device_df['timestamp'])
        device_df.loc[device_df['timestamp'].dt.hour >= self.NIGHT_TARIFF_START_TIME, 'p'] = self.NIGHT_UNIT_COST[self.MACHINE_NAMES.index(device_name)]
        device_df.loc[device_df['timestamp'].dt.hour < self.NIGHT_TARIFF_END_TIME, 'p'] = self.NIGHT_UNIT_COST[self.MACHINE_NAMES.index(device_name)]
        
        device_df['cost'] = device_df['kW'] * device_df['p']
        return device_df
    
    def estimate_load_imbalance(self, device_df, device_name):
        if self.PHASE[self.MACHINE_NAMES.index(device_name)] == 1:
            device_df['unbalanced'] = self.unbalanced([device_df['channel1']])
        elif self.PHASE[self.MACHINE_NAMES.index(device_name)] == 3:
            device_df['unbalanced'] = self.unbalanced([device_df['channel1'], device_df['channel2'], device_df['channel3']])
        return device_df
    
    def estimate_carbon_emissions(self, device_df):
        st = str(device_df['timestamp'].iloc[0])
        en = str(device_df['timestamp'].iloc[-1])

        timestamp, co2 = self.fetch_carbon_emission_data(st, en)
        
        # Creating a DataFrame from the retrieved data
        tco2 = pd.DataFrame({'timestamp': timestamp, 'co2': co2})
        tco2['timestamp'] = pd.to_datetime(tco2['timestamp'])

        # Re-sampling operations
        # The MATLAB code was using 'retime' to handle the time series data.
        # `resample` and `interpolate` achieve a similar result.
        tco2.set_index('timestamp', inplace=True)
        tco2 = tco2.resample(TIMING_MAPPING.get(self.TIMING, self.TIMING)).mean().interpolate() #.reset_index()

        # With the test data tried so far this data lags an hour behind the MATLAB data, so may need to add an hour, but might an an issue with daylight savings
        # COME BACK TO THIS MUST CHECK POTENTIAL ISSUES WITH DAYLIGHT SAVINGS
        tco2.index = tco2.index + pd.Timedelta(hours=1)

        # Merging with the original DataFrame M
        device_df = pd.merge(device_df, tco2, on='timestamp', how='left')
        device_df['co2'].fillna(0, inplace=True)
        device_df['co2'] = device_df['kW'] * device_df['co2']
        return device_df

    def fetch_carbon_emission_data(self, st, en):
        # Convert to ISO8601 format and then replace the last characters to fit the required 'Z' format.
        st_iso = datetime.fromisoformat(st).isoformat().replace('+00:00', 'Z')
        en_iso = datetime.fromisoformat(en).isoformat().replace('+00:00', 'Z')

        url = f'https://api.carbonintensity.org.uk/intensity/{st_iso}/{en_iso}/'

         # Make a HTTP request to the URL with retries
        for _ in range(5):
            data = requests.get(url, timeout=10)
            # check if the response was successful 
            if(data.status_code == 200):
                data = data.json()
            else:
                print("HTTP request failed. Response code: " + str(data.status_code))

        # Extract useful data from the response
        timestamp = [datetime.fromisoformat(item['from'].replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M') for item in data['data']]
        co2 = [item['intensity']['actual'] / 1000 / self.tscale for item in data['data']]
        return timestamp, co2
    
class DataPresentation:
    def calculate_metrics_between_datetimes(from_datetime=None, to_datetime=None, MACHINE_NAMES=None, TIMING=None):
        for device_name in MACHINE_NAMES:
            device_df = pd.read_csv(f'{device_name}/{TIMING}_{device_name}.csv')
            device_df['timestamp'] = pd.to_datetime(device_df['timestamp'])

            # If from_datetime or to_datetime are not provided, set them to the min and max timestamp in the csv.
            if from_datetime is None:
                from_datetime = device_df['timestamp'].min()
            if to_datetime is None:
                to_datetime = device_df['timestamp'].max()

            # Extracting data within the datetime range
            device_data_range = device_df[(device_df['timestamp'] >= from_datetime) & (device_df['timestamp'] <= to_datetime)]

            print("\n----------------------{}--------------------".format(device_name))
            
            total_kW = device_data_range['kW'].sum()
            print("\ntotal kW between {} and {} is = {:.2f} kW".format(from_datetime, to_datetime, total_kW))
            
            if TIMING.lower() != "minutely":
                total_co2 = device_data_range['co2'].sum()
                print("\ntotal CO2 emissions between {} and {} is = {:.2f} kg".format(from_datetime, to_datetime, total_co2))
            else:
                print('\nCannot estimate CO2 if the timing = "minutely"')

            total_cost = device_data_range['cost'].sum()
            print("\ntotal cost between {} and {} is = £{:.2f}".format(from_datetime, to_datetime, total_cost))
            
            avg_channel1 = device_data_range['channel1'][device_data_range['channel1'] != 0].mean()
            avg_channel2 = device_data_range['channel2'][device_data_range['channel2'] != 0].mean()
            avg_channel3 = device_data_range['channel3'][device_data_range['channel3'] != 0].mean()
            print("\nAverage (non-zero) Current (A) in each channels between {} and {} are = [{:.2f} A, {:.2f} A, {:.2f} A]".format(from_datetime, to_datetime, avg_channel1, avg_channel2, avg_channel3))
            
            total_unbalanced = device_data_range['unbalanced'][device_data_range['unbalanced'] != 0].mean()
            print("\nAverage (non-zero) load imbalance between {} and {} is = {:.2f}%".format(from_datetime, to_datetime, total_unbalanced))

    


# Run cell below to check what machines can be analysed

In [3]:
# Read the CSV file into a DataFrame
df = pd.read_csv("Results.csv")

# Get unique values from the 'Device Name' column
unique_names = df['deviceName'].unique()

print(', '.join([f'"{name}"' for name in unique_names]))

"NewIM2", "MediumIM", "SmallIM", "NewIM1", "Compessor"


# Define parameters for analysis

In [4]:
# Define timing: minutely, hourly or daily
TIMING = 'hourly'

# Machines
MACHINE_NAMES = ["Chiller", "BlowMoulder", "Desran"]

# Voltage
VOLTAGE = [240, 240, 240]

# Phase
PHASE = [3, 3, 3]

# Unit cost
DAY_UNIT_COST = [0.45, 0.45, 0.45]
NIGHT_UNIT_COST = [0.09, 0.09, 0.09]

# Night Tariff times - Ignore if both night & day tariffs are the same
# Assumed 10pm - 8am as night rate
NIGHT_TARIFF_START_TIME = 22
NIGHT_TARIFF_END_TIME = 8

# CO2 estimation region
REGION = 'national'

In [5]:
# Checking parameters lists are all the same length
def validate_list_lengths(*lists):
    reference_length = len(lists[0])
    for lst in lists[1:]:
        if len(lst) != reference_length:
            raise ValueError("All lists must have the same length!")
            
validate_list_lengths(MACHINE_NAMES, VOLTAGE, PHASE, DAY_UNIT_COST, NIGHT_UNIT_COST)

# Produce .csv files with analysed data

These will be saved in a directory coresponding with the machine name and timing

In [6]:
processor = CTDataProcessor(TIMING)
processor.process()


to_remove = []  # List to keep track of devices with insufficient data

for device_name in MACHINE_NAMES[:]:  # Create a shallow copy for iteration
    device_df = pd.read_csv(f'{device_name}/{TIMING}_{device_name}.csv')

    # Skipping machines with insufficient data for analysis
    if len(device_df) < 2:
        print(f"Skipping {device_name} as only one line of data.\n")
        to_remove.append(device_name)

# Update MACHINE_NAMES to exclude devices with insufficient data
MACHINE_NAMES = [device for device in MACHINE_NAMES if device not in to_remove]

analyser = CTDataAnalyser(MACHINE_NAMES, TIMING, PHASE, VOLTAGE, DAY_UNIT_COST, NIGHT_UNIT_COST, NIGHT_TARIFF_START_TIME, NIGHT_TARIFF_END_TIME, REGION)
analyser.machine_calculations()

FileNotFoundError: [Errno 2] No such file or directory: 'Chiller/hourly_Chiller.csv'

# Plot figures
Run code below to generate figures then use the dropdowns to interact with them. Note that two identical graphs will be generated.

ctrl+click to select multiple columns for display

In [32]:
machine_data_paths = []

for device_name in MACHINE_NAMES:
    machine_data_paths.append(f'{device_name}/{TIMING}_{device_name}.csv')

# Dropdown for selecting CSV file
file_selector = widgets.Dropdown(
    options=machine_data_paths,
    description='CSV File:',
    disabled=False
)

# Multi-selection widget for columns (initialize with an empty list)
column_selector = widgets.SelectMultiple(
    options=[],
    description='Columns',
    disabled=False
)

out = widgets.Output()

# Getting options for column dropdown
def update_columns(change):
    df = pd.read_csv(file_selector.value, parse_dates=["timestamp"])
    column_selector.options = df.columns[1:]

file_selector.observe(update_columns, names='value')
update_columns(None)

# Define a plotting function with a secondary Y-axis
def plot_data(file, columns):
    with out:
        out.clear_output(wait=True)  # Clear the previous graph

        df = pd.read_csv(file, parse_dates=["timestamp"])

        # Determine which columns to put on the secondary axis
        secondary_y_columns = [col for col in columns if df[col].std() > 10]

        # Initialize figure
        fig = go.Figure()

        # Add traces
        for col in columns:
            fig.add_trace(go.Scatter(x=df['timestamp'], y=df[col], name=col, yaxis='y2' if col in secondary_y_columns else 'y'))

        # Update layout for secondary axis
        fig.update_layout(
            yaxis2=dict(
                overlaying='y',
                side='right'
            )
        )

        fig.show()

display(out)

# Use interactive widget to show the graph
widgets.interactive(plot_data, file=file_selector, columns=column_selector)

Output()

interactive(children=(Dropdown(description='CSV File:', options=('Chiller/hourly_Chiller.csv', 'BlowMoulder/ho…

# Calculate total kW between datetime
Modify the datetimes below and then run code to calculate results (range is inclusive of both entered datetimes). To get metrics for entire timeperiod set "to_datetime" and "from_datetime" equal to None

In [45]:
from_datetime = pd.Timestamp('2023-12-18 00:00:00')
to_datetime = pd.Timestamp('2023-12-19 00:00:00')

DataPresentation.calculate_metrics_between_datetimes(None, None, MACHINE_NAMES, TIMING)


----------------------Chiller--------------------

total kW between 2023-09-18 10:00:00 and 2023-09-27 17:00:00 is = 1918.34 kW

total CO2 emissions between 2023-09-18 10:00:00 and 2023-09-27 17:00:00 is = 211.62 kg

total cost between 2023-09-18 10:00:00 and 2023-09-27 17:00:00 is = £578.76

Average (non-zero) Current (A) in each channels between 2023-09-18 10:00:00 and 2023-09-27 17:00:00 are = [27.44 A, 27.75 A, 26.25 A]

Average (non-zero) load imbalance between 2023-09-18 10:00:00 and 2023-09-27 17:00:00 is = 3.33%

----------------------BlowMoulder--------------------

total kW between 2023-09-18 10:00:00 and 2023-09-27 17:00:00 is = 6830.55 kW

total CO2 emissions between 2023-09-18 10:00:00 and 2023-09-27 17:00:00 is = 753.66 kg

total cost between 2023-09-18 10:00:00 and 2023-09-27 17:00:00 is = £2057.30

Average (non-zero) Current (A) in each channels between 2023-09-18 10:00:00 and 2023-09-27 17:00:00 are = [76.87 A, 76.96 A, 66.24 A]

Average (non-zero) load imbalance betw