In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
import os
import plotly.express as px


In [19]:
def load_SoH_data(cell_id):
    df = pd.read_csv(f'./Saved Predictions/SoH_Cell{cell_id}.csv', header=None)
    y_true = df.iloc[0,:].values.astype(float)
    y_pred = df.iloc[1,:].values.astype(float)
    return y_true, y_pred

In [20]:
def load_RUL_data(cell_id):
    df = pd.read_csv(f'./Saved Predictions/RUL_Cell{cell_id}.csv', header=None)
    y_true = df.iloc[0,:].values.astype(float)
    y_pred = df.iloc[1,:].values.astype(float)
    return y_true, y_pred

In [4]:
def compute_chunked_metrics(y_true, y_pred, cell_id, chunk_size=10, ignore_last_n=0):
    if ignore_last_n > 0:
        y_true = y_true[:-ignore_last_n]
        y_pred = y_pred[:-ignore_last_n]

    metrics = {'Cell': [], 'Chunk': [], 'MSE': [], 'RMSE': [], 'MAPE': [], 'R2': [], 'MAE': []}
    for i in range(0, len(y_true) - chunk_size + 1, chunk_size):
        yt = y_true[i:i+chunk_size]
        yp = y_pred[i:i+chunk_size]

        metrics['Cell'].append(cell_id)
        metrics['Chunk'].append(i // chunk_size)
        metrics['MSE'].append(mean_squared_error(yt, yp))
        metrics['MAE'].append(mean_absolute_error(yt, yp))
        metrics['RMSE'].append(np.sqrt(mean_squared_error(yt, yp)))
        metrics['MAPE'].append(mean_absolute_percentage_error(yt, yp))
        metrics['R2'].append(r2_score(yt, yp))
    
    return pd.DataFrame(metrics)

In [5]:
def plot_box(metric_name, df, SoH_RUL, Save = False, save_dir="Results"):
    fig = px.box(
        df,
        x=df["Cell"].astype(str),  # force Cell to be categorical
        y=metric_name,
        points=False,
        color=df["Cell"].astype(str),
        title=f"{SoH_RUL} Deviation Distribution of {metric_name} Across Cells",
        labels={"x": "Battery Cell", metric_name: metric_name},
        color_discrete_map= custom_colors
    )
    fig.update_layout(
        template="ggplot2",
        boxmode='overlay',
        font=dict(size=26),
        showlegend=True,
        legend_title_text='Cell Number',
        legend=dict(title_font_size=16,font=dict(size=16)),
        xaxis_title=dict(text="Battery Cell Number", font=dict(size=20)),
        yaxis_title=dict(text=metric_name,font=dict(size=22)),
        xaxis=dict(tickfont=dict(size=20)),
        yaxis=dict(tickfont=dict(size=20)),
        title_font=dict(size=24), 
    )
    if Save:
        os.makedirs(save_dir, exist_ok=True)
        save_path = os.path.join(save_dir, f"{SoH_RUL}_{metric_name}.jpg")
        fig.write_image(save_path,width=800,height=600,scale=10)

    fig.show()

custom_colors = {
    '1': 'crimson',
    '3': 'royalblue',
    '7': 'seagreen',
    '8': 'darkorange'
}

## State of Health

In [None]:
test_cells = [1, 3, 7, 8]
chunk_size = 20
SoH_RUL = 'SoH'
ignore_last_n = 0

all_metrics_df = pd.DataFrame()
whole_metrics_df = pd.DataFrame(columns=['Cell', 'MSE', 'RMSE', 'MAPE', 'R2', 'MAE'])

for cell_id in test_cells:
    y_true, y_pred = (load_SoH_data(cell_id) if SoH_RUL == 'SoH' else load_RUL_data(cell_id))

    if ignore_last_n > 0:
        y_true = y_true[:-ignore_last_n]
        y_pred = y_pred[:-ignore_last_n]

    whole_metrics = {
        'Cell': cell_id,
        'MSE': mean_squared_error(y_true, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'MAPE': mean_absolute_percentage_error(y_true, y_pred),
        'R2': r2_score(y_true, y_pred),
        'MAE': mean_absolute_error(y_true, y_pred),
    }
    whole_metrics_df = pd.concat([whole_metrics_df, pd.DataFrame([whole_metrics])], ignore_index=True)

    df = compute_chunked_metrics(y_true, y_pred, cell_id=cell_id, chunk_size=chunk_size)
    all_metrics_df = pd.concat([all_metrics_df, df], ignore_index=True)


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



In [7]:
metric_cols = ['MAPE', 'MAE', 'RMSE', 'R2']
for metric in metric_cols:
    plot_box(metric, all_metrics_df, SoH_RUL=SoH_RUL)

In [8]:
metric_cols = ['MAPE', 'MAE', 'RMSE', 'R2']
print(f"------- {SoH_RUL} Results --------")
print("\nFor Whole Data:")
print(whole_metrics_df)

all_metrics_df[metric_cols] = all_metrics_df[metric_cols].apply(pd.to_numeric, errors='coerce')

metric_tables = {}

for metric in metric_cols:
    desc = all_metrics_df.groupby('Cell')[metric].describe()
    summary = desc.loc[:, ['min', '25%', '50%', '75%', 'max']]
    summary.columns = ['Min', 'Q1', 'Median', 'Q3', 'Max']
    metric_tables[metric] = summary.round(4)

for metric, table in metric_tables.items():
    print(f"\n Summary for {metric} per cell:\n")
    print(table)

------- SoH Results --------

For Whole Data:
  Cell       MSE      RMSE      MAPE        R2       MAE
0    1  0.000004  0.001920  0.001813  0.999196  0.001522
1    3  0.000002  0.001430  0.001367  0.999593  0.001173
2    7  0.000006  0.002499  0.002192  0.998404  0.001847
3    8  0.000002  0.001473  0.001408  0.999565  0.001203

 Summary for MAPE per cell:

         Min      Q1  Median      Q3     Max
Cell                                        
1     0.0012  0.0014  0.0015  0.0020  0.0024
3     0.0009  0.0013  0.0017  0.0018  0.0019
7     0.0010  0.0012  0.0014  0.0015  0.0016
8     0.0009  0.0012  0.0014  0.0016  0.0019

 Summary for MAE per cell:

         Min      Q1  Median      Q3     Max
Cell                                        
1     0.0010  0.0012  0.0014  0.0017  0.0021
3     0.0008  0.0011  0.0015  0.0016  0.0016
7     0.0009  0.0011  0.0012  0.0013  0.0014
8     0.0008  0.0010  0.0013  0.0014  0.0016

 Summary for RMSE per cell:

         Min      Q1  Median      Q3    

## Remaining Useful Life (RUL)

In [None]:
test_cells = [1, 3, 7, 8]
chunk_size = 20
SoH_RUL = 'RUL'
ignore_last_n = 0

all_metrics_df = pd.DataFrame()
whole_metrics_df = pd.DataFrame(columns=['Cell', 'MSE', 'RMSE', 'MAPE', 'R2', 'MAE'])

for cell_id in test_cells:
    y_true, y_pred = (load_SoH_data(cell_id) if SoH_RUL == 'SoH' else load_RUL_data(cell_id))

    if ignore_last_n > 0:
        y_true = y_true[:-ignore_last_n]
        y_pred = y_pred[:-ignore_last_n]

    whole_metrics = {
        'Cell': cell_id,
        'MSE': mean_squared_error(y_true, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'MAPE': mean_absolute_percentage_error(y_true, y_pred),
        'R2': r2_score(y_true, y_pred),
        'MAE': mean_absolute_error(y_true, y_pred),
    }
    whole_metrics_df = pd.concat([whole_metrics_df, pd.DataFrame([whole_metrics])], ignore_index=True)

    df = compute_chunked_metrics(y_true, y_pred, cell_id=cell_id, chunk_size=chunk_size)
    all_metrics_df = pd.concat([all_metrics_df, df], ignore_index=True)


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



In [9]:
metric_cols = ['MAPE', 'MAE', 'RMSE', 'R2']
for metric in metric_cols:
    plot_box(metric, all_metrics_df, SoH_RUL=SoH_RUL)

In [11]:
metric_cols = ['MAPE', 'MAE', 'RMSE', 'R2']
print(f"------- {SoH_RUL} Results --------")
print("\nFor Whole Data:")
print(whole_metrics_df)

all_metrics_df[metric_cols] = all_metrics_df[metric_cols].apply(pd.to_numeric, errors='coerce')

metric_tables = {}

for metric in metric_cols:
    desc = all_metrics_df.groupby('Cell')[metric].describe()
    summary = desc.loc[:, ['min', '25%', '50%', '75%', 'max']]
    summary.columns = ['Min', 'Q1', 'Median', 'Q3', 'Max']
    metric_tables[metric] = summary.round(4)

for metric, table in metric_tables.items():
    print(f"\n Summary for {metric} per cell:\n")
    print(table)

------- RUL Results --------

For Whole Data:
  Cell            MSE        RMSE      MAPE        R2         MAE
0    1  108986.611315  330.131203  3.098211  0.980105  282.218551
1    3   89381.183821  298.966861  4.721766  0.984599  267.905784
2    7   74118.768036  272.247623  5.025312  0.986643  236.000660
3    8   25379.248600  159.308658  1.018574  0.995387  127.449118

 Summary for MAPE per cell:

         Min      Q1  Median      Q3     Max
Cell                                        
1     0.0237  0.0532  0.0826  0.1276  0.1727
3     0.0344  0.0443  0.0541  0.0955  0.1368
7     0.0397  0.0426  0.0455  0.0697  0.0940
8     0.0138  0.0183  0.0227  0.0603  0.0978

 Summary for MAE per cell:

           Min        Q1    Median        Q3       Max
Cell                                                  
1     153.9174  266.4655  379.0135  395.4336  411.8537
3     235.8270  236.8867  237.9464  255.5096  273.0729
7     185.5578  197.9557  210.3536  263.5735  316.7934
8      93.8349   95.