In [None]:
from utils import utils_models, utils_gn, utils_dgrd, utils_sig
from config.definitions import ROOT_DIR
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from scipy.interpolate import interp1d
import importlib
import numpy as np
import pickle
import os
from scipy import stats
import seaborn as sns
importlib.reload(utils_models)
importlib.reload(utils_gn)
importlib.reload(utils_dgrd)
importlib.reload(utils_sig)

# The geometrical interpretation of the first two levels of signature

We define a two-dimensional path $P = \{t, V(t)\}$, where $t$ is the time and $V(t)$ is the constant-current voltage at discharge. We are interested in the first two levels of the signature of $P$ because of its geometrical intuition; see section 1.2.4 of the article *A Primer on the Signature Method in Machine
Learning* by Ilya Chevyrev and Andrey Kormilitzin. 

The first two levels of the signature of $P$ is $$\left(S(P)^1, S(P)^2, S(P)^{1,1}, S(P)^{1,2}, S(P)^{2,1}, S(P)^{2,2}\right).$$

Here, $S(P)^1, S(P)^2$ are the increments of the path, i.e., $$S(P)^1 = t_{final} - t_{initial}; \quad S(P)^2 = V_{final} - V_{initial}.$$ On the other hand, $$S(P)^{1,1} = (t_{final} - t_{initial})^2/2 ; \quad S(P)^{2,2} = (V_{final} - V_{initial})^2/2.$$

The rest of the terms are more interesting. $S(P)^{1,2}$ is the area bounded by the lines $V=V_{initial}$, $t=t_{initial}$ and the curve $V(t)$. As for the the term  $S(P)^{1,2}$, it is the area bounded by the lines $V=V_{final}$, $t=t_{final}$ and the curve $V(t)$. We demostrate these two terms in the following figures.

In [None]:
# read training data
train_raw_data = utils_gn.read_data('train_1238.pkl')

In [None]:
# Define some sample cell and cycle
sample_cell = 'b2c12'
sample_cycle = '4'

In [None]:
# get the discharge values
i_values = utils_gn.get_charge_discharge_values(train_raw_data, 'I', sample_cell, sample_cycle, 'di')
v_values = utils_gn.get_charge_discharge_values(train_raw_data, 'V', sample_cell, sample_cycle, 'di')
t_values = utils_gn.get_charge_discharge_values(train_raw_data, 't', sample_cell, sample_cycle, 'di')

# get the indices of the start and end of CC
start_I, end_I = utils_gn.get_constant_indices(i_values, 'di')

# get the corresponding voltages 
ccv = v_values[start_I:end_I+1]

# get the corresponding time 
cct = t_values[start_I:end_I+1]
cct = cct - min(cct)

In [None]:
# Display
fig, ax = plt.subplots(1, 2, figsize=(16, 6))
ax[0].plot(cct, ccv, color='brown')
ax[0].hlines(y=min(ccv), xmin=min(cct), xmax=max(cct), linestyles='dashed', color='brown', linewidth=2.0)
ax[0].vlines(x=min(cct), ymin=min(ccv), ymax=max(ccv), linestyles='dashed', color='brown', linewidth=2.0)
ax[0].fill_between(cct, y1=min(ccv), y2=ccv, alpha=0.2, color='brown')
ax[0].text(x=6.5, y=2.6, s=r"$-S^{1,2}$", fontsize=16)
ax[0].set_ylabel(r'CC Voltage at discharge, $V(t)$', fontsize=16)
ax[0].set_xlabel(r"time, $t$", fontsize=16)
ax[0].text(x=6.0, y=2.05, s=r"$V=V_{final}$", fontsize=16)
ax[0].text(x=0.02, y=2.6, s=r"$t=t_{initial}$", fontsize=16, rotation=90)
ax[0].text(x=6, y=3.15, s=r"$V=V(t)$", fontsize=16)


ax[1].plot(cct, ccv, color='brown')
ax[1].hlines(y=max(ccv), xmin=min(cct), xmax=max(cct), linestyles='dashed', color='brown', linewidth=2.0)
ax[1].vlines(x=max(cct), ymin=min(ccv), ymax=max(ccv), linestyles='dashed', color='brown', linewidth=2.0)
ax[1].fill_between(cct, y1=max(ccv), y2=ccv, alpha=0.2, color='brown')
ax[1].text(x=7.5, y=3.2, s=r"$-S^{2,1}$", fontsize=16)
#ax[1].set_ylabel(r'CC Voltage at discharge, $V(t)$', fontsize=14)
ax[1].set_xlabel(r"time, $t$", fontsize=16)
ax[1].text(x=6.0, y=3.4, s=r"$V=V_{initial}$", fontsize=16)
ax[1].text(x=13.5, y=3.0, s=r"$t=t_{final}$", fontsize=16, rotation=90)
ax[1].text(x=6, y=3.0, s=r"$V=V(t)$", fontsize=16)

plt.savefig(fname=f"{ROOT_DIR}/plots/sig_level2_geometry_inter.pdf", bbox_inches='tight')


We see that $$ -S(P)^{1,2} = \int_{t_{initial}}^{t_{final}} \left(V(t)-V_{final}\right)\,dt; \quad -S(P)^{2,1} = \int_{t_{initial}}^{t_{final}} \left(V_{initial}-V(t)\right)\,dt.$$ The negative sign is as a result of the orientation of the path $P$, since it is traversed in a clockwise direction: $V(t)$ decreases with an increase in time $t$; see section 2.1.4 of the abovementioned article.

In [None]:
# Let's check how these changes with cycles
data = utils_gn.read_data("data_all.pkl")
fig, ax = plt.subplots(1, 2, figsize=(16, 6))

for cycle in ('2', '50', '100', '300', '400'):
    # get the discharge values
    i_values = utils_gn.get_charge_discharge_values(data, 'I', sample_cell, cycle, 'di')
    v_values = utils_gn.get_charge_discharge_values(data, 'V', sample_cell, cycle, 'di')
    t_values = utils_gn.get_charge_discharge_values(data, 't', sample_cell, cycle, 'di')

    # get the indices of the start and end of CC
    start_I, end_I = utils_gn.get_constant_indices(i_values, 'di')

    # get the corresponding voltages 
    ccv = v_values[start_I:end_I+1]

    # get the corresponding time 
    cct = t_values[start_I:end_I+1]
    cct = cct - min(cct)

    ax[0].plot(cct, ccv, alpha=0.3)
    ax[0].fill_between(cct, y1=min(ccv), y2=ccv, alpha=0.2, label=f'Cycle {cycle}')


    ax[1].plot(cct, ccv, alpha=0.3)
    ax[1].fill_between(cct, y1=max(ccv), y2=ccv, alpha=0.2, label=f'Cycle {cycle}')

ax[0].set_ylabel(r'CC Voltage at discharge, $V(t)$', fontsize=16)
ax[0].set_xlabel(r"time, $t$", fontsize=16)    

ax[1].set_xlabel(r"time, $t$", fontsize=16)

ax[0].set_title(r"$-S^{1,2}$", fontsize=16)
ax[1].set_title(r"$-S^{2,1}$", fontsize=16)

ax[0].legend()
ax[1].legend(loc='lower left')
plt.savefig(fname=f"{ROOT_DIR}/plots/sig_level2_geometry_inter2.pdf", bbox_inches='tight')


## Checking Pearson correlation of generated features with EOL and IRatEOL

In [None]:
knee_elbow_df =  utils_sig.ccv_signature_features(data_dict=train_raw_data, n=100, multi_cycle=False).join(utils_dgrd.create_knee_elbow_data(train_raw_data))

In [None]:
df = knee_elbow_df.copy()
df = df.drop(['k-o', 'k-p', 'Qatk-o', 'Qatk-p', 'e-o', 'e-p', 'IRate-o', 'IRate-p'], axis=1)

In [None]:
df.columns[:-2]

In [None]:
targets = ['EOL', 'IRatEOL']
corr_matrix = df.corr()
features = df.columns[:-2]

fig = plt.figure(figsize=(7, 11))

for i, tar in enumerate(targets):

    ax = fig.add_subplot(1, 2, i+1)
    ax.text(0.05, 0.99, tar, transform=ax.transAxes, fontsize=16, fontweight='bold', va='top')

    if i==1:
        ax.set_yticklabels([])

    corr_for_tar = corr_matrix.loc[features, tar]  
    ax.barh(features, corr_for_tar, color='brown', ec='black', alpha=0.78)
    ax.set_xlim([-1, 1])
    ax.set_xlabel(r'Correlation coefficent ($\rho$)', fontsize=12)
    ax.axvline(x=0.5, color='black', linestyle='--', alpha=0.5)
    ax.axvline(x=-0.5, color='black', linestyle='--', alpha=0.5)
    ax.axvline(x=-0.0, color='black', linestyle='-', alpha=0.5)
    #ax.tick_params(axis='y', labelsize=16)

plt.tight_layout()
plt.savefig(fname=f"{ROOT_DIR}/plots/sig-table1&2-features-corr-eol-ir.pdf", bbox_inches='tight')

# Checking Pearson correlation of the signatures with the cycle numbers

In [None]:
# Get the signatures 
train_raw_data = utils_gn.read_data('train_1238.pkl')
signatures =  utils_sig.ccv_signature_features(data_dict=train_raw_data, n=100, return_sig=True)

In [None]:
# Create a list of cycle number
cycle_numbers = np.arange(1, 101)
cycle_numbers

In [None]:
signatures.keys()

In [None]:

# Create a barchart of correlation between singatures and cycle numbers for randomly
# selected cells
selected_cells = ['b1c30', 'b2c40', 'b3c22', 'b8c22']
signature_labels = [r'$S^1$', r'$S^2$', r'$S^{1,1}$', r'$S^{1,2}$', r'$S^{2,1}$', r'$S^{2,2}$']
fig = plt.figure(figsize=(16, 5))

for i, cell in enumerate(selected_cells):

    ax = fig.add_subplot(2, 2, i+1)
    ax.text(0.02, 0.97, cell, transform=ax.transAxes, fontsize=16, fontweight='bold', va='top')

    if i % 2 != 0:
        ax.set_yticklabels([])
        
    if i % 2 == 0:
        ax.set_ylabel(r'Signatures', fontsize=16)

    if i in [2, 3]:
        ax.set_xlabel(r'Correlation coefficent ($\rho$)', fontsize=16)

    # Get the Pearson's correlation for the current cell, between cycle numbers and each
    # signature component
    corr_for_cell = [stats.pearsonr(cycle_numbers, signatures[cell][:, i])[0] for i in range(6)]

    ax.barh(signature_labels, corr_for_cell, color='brown', ec='black', alpha=0.78)
    ax.set_xlim([-1, 1])
    ax.axvline(x=0.5, color='black', linestyle='--', alpha=0.5)
    ax.axvline(x=-0.5, color='black', linestyle='--', alpha=0.5)
    ax.axvline(x=-0.0, color='black', linestyle='-', alpha=0.5)

plt.tight_layout()

In [None]:
# Plot a scatter plot of all the signature components against cycle numbers for a given cell
given_cell = 'b1c30'

fig = plt.figure(figsize=(16, 8))

for i, sig in enumerate(signature_labels):

    ax = fig.add_subplot(3, 2, i+1)

    # Get the Pearson's correlation for the current signature component
    corr_for_sig = stats.pearsonr(cycle_numbers, signatures[cell][:, i])[0]

    ax.scatter(cycle_numbers, signatures[cell][:, i], color='brown', s=20, alpha=0.5)
    ax.text(
        0.02,
        0.97 if i%2 !=0 else 0.2,
        r'$\rho = {}$'.format(np.round(corr_for_sig, 2)),
        transform=ax.transAxes,
        fontsize=16,
        fontweight='bold',
        va='top',
    )
    ax.set_ylabel(signature_labels[i], fontsize=16)

    if i in [4, 5]:
        ax.set_xlabel('Cycles', fontsize=16)
plt.tight_layout()
plt.savefig(fname=f"{ROOT_DIR}/plots/sig-scatter-plot-sigcomponent-cyclenumber.pdf", bbox_inches='tight')