### Notebook Guide
- todo

### Prerequisites

In [5]:
# run if you're unsure if your packages are up to date.
import sys
!{sys.executable} -m pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable


In [1]:
import pandas as pd
import os
import glob
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

### Import Files
Files are standardised as per /Data/README.txt

/Data/Cycling/(Cell)/AX\_(Cell Type)(Cell ID)\_(Day)(Month)\_CAX

Cell Type = 2 chars.

Cell ID  = 3 chars

In [2]:
data_directory = 'Data/Cycling/'
files = glob.iglob(data_directory +'**/*.txt', recursive=True)
# this will work provided you keep the data format standardised...
Cell_Tests = dict()

def retrieve_files():
    dataset = dict()
    for f in files:
        columns = ['time/s', 'Ecell/V', 'I/mA', 'Temperature/°C', 'cycle number']
        temp_df = pd.read_csv(f, sep='\t', usecols=columns, encoding='ISO-8859-1')
        dataset[os.path.basename(f)[3:17]] = temp_df
        print(f'Successfully imported test {os.path.basename(f)[3:17]} with shape {temp_df.shape}')
    return dataset

Cell_Tests = retrieve_files()
print("Done")

Successfully imported test NX001_21-08-23 with shape (269049, 5)
Successfully imported test NX001_31-07-23 with shape (120655, 5)
Successfully imported test NX002_21-08-23 with shape (268652, 5)
Successfully imported test NX002_31-07-23 with shape (120808, 5)
Successfully imported test NX006_21-08-23 with shape (207146, 5)
Successfully imported test NX006_31-07-23 with shape (120806, 5)
Successfully imported test RS001_21-08-23 with shape (228545, 5)
Successfully imported test RS001_31-07-23 with shape (120465, 5)
Successfully imported test RS006_21-08-23 with shape (229161, 5)
Successfully imported test RS006_31-07-23 with shape (120383, 5)
Successfully imported test SG003_07-02-23 with shape (693003, 5)
Successfully imported test SG004_07-02-23 with shape (227678, 5)
Successfully imported test SG007_07-02-24 with shape (645535, 5)
Successfully imported test SG007_21-08-23 with shape (460741, 5)
Successfully imported test SG007_31-07-23 with shape (115995, 5)
Successfully imported tes

### Utility Functions

In [3]:
def add_dates(dataset):
    for key, df in dataset.items():
        df['date'] = pd.to_datetime(key[6:])
    return dataset

def filter_singlecell(dataset, cell_partial_key):
    """
    Returns a subset of the dataset containing only the cell specified.
    Args:
    dataset (dict): The dataset to filter.
    cell_partial_key (str): The partial key (cell name) to match.
    """
    filtered_dataset = dict()
    for key, v in dataset.items():
        if cell_partial_key in key:
            filtered_dataset[key] = v
    return filtered_dataset

def combine_tests(dataset):
    """
    correct cycle number
    """
    sorted_keys = sorted(dataset.keys(), key=lambda x: pd.to_datetime(x[6:])) # there's also probably a better way of doing this, but since the codes contain the date might as well do it this way.
    dataframes_temp = [] # there's a better way of doing this that avoids creating an extra list, but this is easy..
    cumulative_time = 0
    cumulative_cycles = 0
    for key in sorted_keys:
        df = dataset[key]
        df['time/s'] = df['time/s'] + cumulative_time
        df['cycle number'] = df['cycle number'] + cumulative_cycles
        cumulative_time = df['time/s'].max()
        cumulative_cycles = df['cycle number'].max()
        dataframes_temp.append(df)
    combined_df = pd.concat(dataframes_temp, ignore_index=True)
    return combined_df

def add_capacity(df):
    """
    Requires some work on the filtering side.
    """
    grouped = df.groupby('cycle number')
    capacity = []
    for cycle, group in grouped: # I'm kind of amazed that this works....
        # How does Python know that by specifying (group 1), (group 2) that you want to group by the unique value in the table?
        # Doing something like this in another framework would require a whole search index with .unique() or iterating by cycle number values.
        # Having to debug this to know how/why/what it is doing is why I hate implicit programming.
        group = group[group['I/mA'] < -500] # filter down the group to just the times which are discharging
        times = group['time/s'].values
        currents = group['I/mA'].values
        voltage = group['Ecell/V'].values
        # need to filter to just the charge stage
        capacity_mAh = -np.trapz(currents, times)/3600.0 # this uses the composite trapezoidal rule, which is apparently more accurate than a simple trapezoidal.
        capacity_mWh = -np.trapz(currents*voltage, times)/3600.0 # check if this is bugged possibly what was causing the massive datasets
        capacity.append((cycle, capacity_mAh, capacity_mWh))
    capacity_df = pd.DataFrame(capacity, columns=['cycle number', 'capacity/mAh', 'capacity/mWh'])
    df = df.merge(capacity_df, on='cycle number', how='left')
    return df

def add_capacity_dataset(dataset):
    for key, df in dataset.items():
        df = add_capacity(df)
        dataset[key] = df
    return dataset


### Plotting Functions

In [4]:
# this could be made less redundant and more efficient.
def plot_capacity_allcycles_tohtml(cycle_data, showfig=False):
    fig = plot_capacity_allcycles(cycle_data, showfig)
    fig.write_html(data_directory+"/Output/"+cycle_data.name+"_capacity.html")

def plot_capacity_allcycles_tosvg(cycle_data, showfig=False):
    # DO NOT USE ON LARGE DATASETS
    fig = plot_capacity_allcycles(cycle_data, showfig)
    fig.write_image(data_directory+"/Output/"+cycle_data.name+"_capacity.svg")

def plot_capacity_allcycles_topdf(cycle_data, showfig=False):
    # DO NOT USE ON LARGE DATASETS
    fig = plot_capacity_allcycles(cycle_data, showfig)
    fig.write_image(data_directory+"/Output/"+cycle_data.name+"_capacity.pdf")

def plot_capacity_allcycles_topng(cycle_data, _width, _height, showfig=False):
    fig = plot_capacity_allcycles(cycle_data, showfig)
    fig.write_image(data_directory+"/Output/"+cycle_data.name+"_capacity.png", width=_width, height=_height)

def plot_capacity_allcycles(cycle_data, showfig=True):
    fig = px.line(cycle_data, x=cycle_data['cycle number'], y=cycle_data['capacity/mAh'], color='date')
    if(showfig): fig.show()
    return fig

def plot_cell_voltage(cycle_data):
    fig = px.line(cycle_data, x=cycle_data['time/s'], y=cycle_data['Ecell/V'], color='cycle number')
    fig.update_xaxes(rangeslider_visible=True)
    fig.show()

def plot_cell_voltage_current(cycle_data, showfig=True):
    fig_voltage = px.line(cycle_data, x=cycle_data['time/s'], y=cycle_data['Ecell/V'], color='cycle number')
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    for trace in fig_voltage.data:
        fig.add_trace(trace, secondary_y=False)

    fig.add_trace(go.Line(x=cycle_data['time/s'], y=cycle_data['I/mA']*0.001, mode='lines', name='I/A'), secondary_y=True)
    fig.update_xaxes(rangeslider_visible=True)
    if(showfig): fig.show()
    return fig

def plot_cell_voltage_current_tohtml(cycle_data, showfig=False):
    fig = plot_cell_voltage_current(cycle_data, showfig)
    fig.write_html(data_directory+"/Output/"+cycle_data.name+"_cycling.html")

In [5]:
# ONLY RUN ONCE!! CURRENTLY BUGGED WHEN RUNNING MULTIPLE TIMES
Cell_Tests = add_dates(Cell_Tests)
Cell_Tests = add_capacity_dataset(Cell_Tests)

In [6]:
# Add calculated values if not already present in the dataset.
# Have a think about whether this should be output to a separate processed folder or read from and sent back to the database.

#print(Cell_Tests)
SG009 = filter_singlecell(Cell_Tests, "SG009")
SG009 = combine_tests(SG009)
SG009.name = "SG009"
#print(NX001)
# fix bug where capcity column can duplicate
# check this influences the original array

In [11]:
plot_capacity_allcycles_tohtml(SG009)
plot_cell_voltage_current_tohtml(SG009)
#plot_cell_voltage_current(SG009)

# need to split the data to only plot 1/10th of points



plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [8]:
SG008 = filter_singlecell(Cell_Tests, "SG008")
SG008 = combine_tests(SG008)
SG008.name = "SG008"
plot_capacity_allcycles_tohtml(SG008)

## Plotting capacity against cycles

# save this for later when plotting.

In [9]:
#Don't use these fns... too slow

def resample(df, time_column, rule):
    df[time_column] = pd.to_datetime(df[time_column], unit='s')
    return df.resample(rule, on=time_column).mean().reset_index()

def plot_all_cycles(df, battery_name):
    #sample the data as otherwise it takes forever to plot

    df_sampled = resample(df, 'time/s', '30s')
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=df['time/s'], y=df_sampled['Ecell/V'], mode='lines', name='Ecell/V'))
    fig.add_trace(go.Scatter(x=df['time/s'], y=df_sampled['I/mA'], mode='lines', name='I/mA'))
    fig.add_trace(go.Scatter(x=df['time/s'], y=df_sampled['Temperature/°C'], mode='lines', name='Temperature/°C'))

    cycles = df['cycle number'].unique()

    # Add shaded areas for each cycle
    for cycle in cycles:
        cycle_data = df[df['cycle number'] == cycle]
        cycle_start = cycle_data['time/s'].min()
        cycle_end = cycle_data['time/s'].max()

        fig.add_shape(
            type='rect',
            x0=cycle_start,
            x1=cycle_end,
            y0=0,
            y1=1, 
            yref='paper',
            fillcolor='LightSlateGrey' if cycle % 2 == 0 else 'LightSkyBlue',
            opacity=0.3,
            line_width=0,
        )

    # Customize layout
    fig.update_layout(
        title=f'Battery {battery_name}',
        xaxis_title='Time (s)',
        yaxis_title='Value',
        template='plotly_white',
        shapes=[],
    )

    # Show the plot
    fig.show()

# Archive

In [10]:
#Function to plot specified cycle
def plot_cycle(df, battery_name, cycle_number, show_temp):
    
    cycle_data = df[(df['cycle number'] == cycle_number)]

    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=cycle_data['time/s'], y=cycle_data['Ecell/V'], mode='lines', name='Ecell/V'), secondary_y=False)
    fig.add_trace(go.Scatter(x=cycle_data['time/s'], y=cycle_data['I/mA']*0.001, mode='lines', name='I/A'), secondary_y=True)
    if show_temp == True:
        fig.add_trace(go.Scatter(x=cycle_data['time/s'], y=cycle_data['Temperature/°C'], mode='lines', name='Temperature/°C'))

    fig.update_layout(title=f'Cycle {cycle_number} for Battery {battery_name}', xaxis_title='Time (s)', yaxis_title='Value')
    fig.update_yaxes(title_text="Cell Voltage (V)", secondary_y=False)
    fig.update_yaxes(title_text="Current (A)", secondary_y=True)
    fig.show()

#Plot the specified cycle and the two cycles that come after it
#todo rewrite this, i don't like it.
#todo use the define y function to allow it to display temp, voltage and current on the same chart or alternatively use subplots if it works better
def plot_3_cycles_cyclenum(df, battery_name, cycle_number, show_temp):
    
    cycle_data = df[(df['cycle number'] == cycle_number) | (df['cycle number'] == cycle_number + 1) | (df['cycle number'] == cycle_number + 2)]
    fig = px.line(cycle_data, x=cycle_data['time/s'], y=cycle_data['Ecell/V'], color='cycle number')
    #fig = make_subplots(specs=[[{"secondary_y": True}]])
    #fig.add_trace(go.Scatter(x=cycle_data['time/s'], y=cycle_data['Ecell/V'], mode='lines', name='Ecell/V'), secondary_y=False)
    fig.add_trace(go.Line(x=cycle_data['time/s'], y=cycle_data['I/mA']*0.001, name='I/A'))
    #if show_temp == True:
    #    fig.add_trace(go.Scatter(x=cycle_data['time/s'], y=cycle_data['Temperature/°C'], mode='lines', name='Temperature/°C'))
    fig.update_layout(title=f'Cycle {cycle_number}, {cycle_number+1} and {cycle_number+2} for Battery {battery_name}', xaxis_title='Time (s)')
    fig.update_yaxes(title_text="Cell Voltage (V)", secondary_y=False)
    fig.update_yaxes(title_text="Current (A)", secondary_y=True)
    # I spent a fair few hours here attempting to use an iterative or other method to highlight at what point the cycle number changed.
    # This is important, as I need to know exactly how the cycle number was determined to be able to make accurate capacity calculations.
    # Then I realised there's a far simpler solution. Just change the colour based on cycle number.
    # For a small number of cycles, this is sufficient, but won't work for plotting large cycle counts as you soon run out of distinguishable colours.
    # ISSUE: I can't get this working without using plotly express.
    # haven't tried the below method but it may work.
    #for cycle in df['cycle number'].unique():
        #cycle_df = df[df['cycle number'] == cycle]
        #fig.add_trace(go.Scatter(
        #   x=species_df['[insert plotted value]'],
        #   y=species_df['[insert value]'],
        #   mode='markers',
        #   name=cycle,
        #   marker=dict(size=10, symbol='circle'),
    #))
    # alternatively, https://community.plotly.com/t/automatically-pick-colors-when-using-add-trace/59075
    fig.show()

def plot_3_cycles(df, battery_name, cycle_number, show_temp):
    
    cycle_data = df[(df['cycle number'] == cycle_number) | (df['cycle number'] == cycle_number + 1) | (df['cycle number'] == cycle_number + 2)]

    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=cycle_data['time/s'], y=cycle_data['Ecell/V'], name='Ecell/V'), secondary_y=False)
    fig.add_trace(go.Scatter(x=cycle_data['time/s'], y=cycle_data['I/mA']*0.001, mode='lines', name='I/A'), secondary_y=True)
    if show_temp == True:
        fig.add_trace(go.Scatter(x=cycle_data['cycle number'], y=cycle_data['Temperature/°C'], mode='lines', name='Temperature/°C'))
    fig.update_layout(title=f'Cycle {cycle_number}, {cycle_number+1} and {cycle_number+2} for Battery {battery_name}', xaxis_title='Time (s)')
    fig.update_yaxes(title_text="Cell Voltage (V)", secondary_y=False)
    fig.update_yaxes(title_text="Current (A)", secondary_y=True)

    fig.show()

stuff that'll come in handy later
https://towardsdatascience.com/resample-function-of-pandas-79b17ec82a78
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html
blog post on decimating data
https://www.geeksforgeeks.org/python-pandas-dataframe-resample/