
'''
Double Expotential Model (DEM): 
OCP_conv : converge ocv (threshold = 1*e9), predicte interal = 20min (as an examle)
OCP_test : ocv from the test data
voltage_extrap : the pred trend (extrapolate part), including every point before the end of period time. can be to 1h/2h/3h..
voltage_fits : fitting result based on DEM

!!!
input:  data_path, save_path, convergence(threshold, interval_t),extrapolation(pre_length)
out put: the result of  extrapolation OCV
         the resut of convergence OCV
'''

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.metrics import mean_squared_error,r2_score
from scipy.interpolate import interp1d
from scipy.ndimage import gaussian_filter
import warnings
from mpl_toolkits.axes_grid1.inset_locator import inset_axes, mark_inset
from scipy.optimize import curve_fit, OptimizeWarning
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
import scienceplots
plt.style.use(['science', 'nature', 'no-latex'])

In [None]:

file = ['.../test_3_7_20C_10min10min_20250218.csv',]   # the path of the input file
save_pred_path = ''   # save the extrapolate result
path_conv = '' # save the converge ocv


In [None]:
# need to input
'''for convergece'''
threshold = 1e-9  # the threshold for converge
interval_t = 1200  # the time interval of each prediction, unit: seconds
'''for extrapolation'''
pre_length =3600  # the length of prediction, unit: seconds (for extrapolation)

In [None]:

warnings.filterwarnings("ignore")
data = pd.concat([pd.read_csv(file) for file in file],ignore_index=True)  
file_name = file[0].split('/')[-1].split('.')[0]
def file_read (file,data):
    '''Read every column data'''
    column_dict = {column_name: list(column_data) for column_name, column_data in data.items()}
    '''find split index of every pulse'''
    diff_ = np.diff(data['Steps'])
    split_index = np.where(diff_ != 0)[0] +1   #the first index of every step
    cccv_index = split_index[1]  # end point of CCCV
    '''trans time to seconds'''
    each_t = [int(x.split(":")[0])*60*60+int(x.split(":")[1])*60+int(x.split(":")[2].split(".")[0]) for x in data['Relative Time(h:min:s.ms)'].values]
    '''export current and voltage data'''
    current = data['Current(A)'].values
    voltage = data['Voltage(V)'].values
    return column_dict, split_index, cccv_index, each_t, current, voltage
column_dict, split_index, cccv_index, each_t, current, voltage = file_read(file,data)

In [31]:
'''extract reat data and conresponding time'''

def rest_seg (voltage,split_index,time):
    '''
    arg : 
    voltage : voltage data of the whole process
    split_index : the index of the start of each step
    time : the time of each point (second)
    return :
    rest : the voltage data of rest process
    rest_time : the time of rest process
    '''
    rest = []
    rest_time = []  
    for i in range(len(split_index)):         
        if i == 0:
            continue
        elif i == len(split_index)-1:
            rest.append(voltage[split_index[i]:])
            rest_time.append(time[split_index[i]:])
        elif i % 2 == 0 :
            rest.append(voltage[split_index[i]:split_index[i+1]])    
            rest_time.append(time[split_index[i]:split_index[i+1]]) 
    return  rest, rest_time
rest_v,rest_t = rest_seg(voltage,split_index,each_t)    

In [32]:
class DoubleExpModel():
    '''
    A class to model data using a double exponential function.

    This class provides methods to smooth data(gaussian), normalize, fit, error caclulation , and predict data
    based on a double exponential function of the form:
    f(x) = a * exp(-b * x) + c * exp(-f * x) + d.
    
    '''
    def __init__(self,x,y):
        '''
        Initialize the model with raw data,
        Store the needed data and parameters.
        '''
        self.x = x      # input data - time  (list or array)
        self.y = y      # input data - voltage
        self.x_norm = None      # normalized x
        self.y_smoothed = None      # smoothed y based on gaussian filter
        self.y_fit = None           # fitted y based on double exponential function
        self.popt = None            # fitted parameters of double exponential function
        self.y_pred = None           # predicted y based on double exponential function
        
    def filter(self):
        '''smooth the raw data using gaussian filter'''
        self.y_smoothed = gaussian_filter(self.y, sigma=10)
        return self.y_smoothed
    
    def normfuc(self):
        '''
        normalization function:
        return the normalized x
        '''
        try:
            self.x_norm = (self.x - np.min(self.x)) / (np.max(self.x) - np.min(self.x))
        except: 
            raise ValueError("The raw data is not in a proper order.")
        return self.x_norm
        
    def func(self, x, a, b, c, d, f):
        ''' define the double exponential function '''
        return a * np.exp(-b * x) + c * np.exp(-f * x) + d
    
    def fit(self):
        '''
        fitting the data based on double exponential function
        '''
        try:
            self.filter()
            self.normfuc()
            initial_guass = [4.5e-03, 3e+00, 5e-03, 3.5e+00,5e+01]     # for 2032 cells
            #initial_guess = [ 0.03,  3,  0.03,  2.5, 40]               # for 2016 cells
            self.popt, _ = curve_fit(self.func, self.x_norm,self.y_smoothed,p0=initial_guass,maxfev=30000)
            self.y_fit = self.func(self.x_norm, *self.popt)
        except Exception as e:
            print("Error during fitting:", e)
        return self.y_fit, self.popt
    
    def predict(self,x_start,x_end):
        '''
        predict the new data based on fitted model and popts
        x_start : the start point to be predicted (follow the knowed x)
        '''
        x_extend = np.linspace(x_start, x_end, x_end-x_start)
        intered = interp1d(self.x_norm, self.y_smoothed, kind='cubic', bounds_error=False, fill_value="extrapolate")
        x_ext  = (x_extend - np.min(self.x)) / (np.max(self.x) - np.min(self.x))
        self.y_pred = np.where((x_ext  > self.x_norm.max()),  
                 self.func(x_ext , *self.popt),
                 intered(x_ext ))
        return self.y_pred

    def error_measure(self):
        '''
        calculate the fitting error of the model---based on the smoothed data
        if we need to calculate the error based on the raw data?
        '''
        RSS = np.sum((self.y_smoothed - self.y_fit) ** 2)  
        MSE = mean_squared_error(self.y_smoothed, self.y_fit)  
        RMSE = np.sqrt(MSE)  
        R2 = r2_score(self.y_smoothed, self.y_fit)  
        return {
            "RSS": RSS,
            "MSE": MSE,
            "RMSE": RMSE,
            "R2": R2
            }

In [None]:
'''export the predicted voltage trend to predefined time'''
start_point = len(rest_t[0]) ## the start point to be predicted
def Extra_pred_vol(rest_v,rest_t, pre_length):
    voltage_extrap=[]
    for i in range(len(rest_v)):
        model = DoubleExpModel(rest_t[i],rest_v[i])
        y_fit,popt = model.fit()
        y_pred = model.predict(start_point,pre_length)    
        voltage_extrap.append(y_pred)
    return voltage_extrap   

voltage_extrap = Extra_pred_vol(rest_v,rest_t,pre_length)     # to one hour prediction

'''save the trend of predicted voltage'''
df = pd.DataFrame(voltage_extrap)
df.index = [f"Pulse_{i}" for i in range(len(voltage_extrap))] # set the row index
df.columns = [f"time_{j}" for j in range(len(voltage_extrap[0]))] # set the column index
df.to_csv(save_pred_path + f"{file_name}_extra.csv",index=True, header=True)  # name changable


In [34]:
'''define a function to calculate a converge OCV bsed on differential'''
def ocv_converge(threshold, x, y, start_point, interval_t):
    '''
    start_point: the start point of the prediction(= end point of rest process)
    interval_t: the time interval of each prediction,unit:seconds  
    '''
    model = DoubleExpModel(x, y)
    y_fit,popt = model.fit()  
    errors = model.error_measure() 
    end_point = start_point + interval_t     
    y_pred = model.predict(start_point,end_point)
    x_new = np.linspace(start_point,end_point,interval_t)  
    dydx = np.gradient(y_pred, x_new)    
    
    while np.abs(dydx[-1]) > threshold and dydx[-1] < 0:   # for charge 
        
        start_point = end_point
        end_point = start_point + interval_t
        x_new = np.linspace(start_point,end_point,interval_t)   # update the x_new
        y_pred = model.predict(start_point,end_point)
        dydx = np.gradient(y_pred, x_new) 
    
    while np.abs(dydx[-1]) > threshold and dydx[-1] > 0:   # for discharge process
        
        start_point = end_point
        end_point = start_point + interval_t
        x_new = np.linspace(start_point,end_point,interval_t)
        y_pred = model.predict(start_point,end_point)
        dydx = np.gradient(y_pred, x_new)
    return y_pred[-1]


In [None]:
'''export every fitting, error and corresponding OCP-- 
taking 20min interval as an example'''
voltage_fits = []
Errs = []
OCP_conv = []
OCP_test = []
for i in range(len(rest_t)):
    x = np.array(rest_t[i])
    y = np.array(rest_v[i])
    OCP_test.append(rest_v[i][-1])
    model = DoubleExpModel(x,y)
    y_fit,popt = model.fit()
    errors = model.error_measure()
    voltage_fits.append(y_fit)
    Errs.append(errors)
    OCP = ocv_converge(threshold, x, y, start_point, interval_t)        # 20min interval
    OCP_conv.append(OCP)


''' save the converge OCV '''
df_conv_OCP = pd.DataFrame(OCP_conv)
df_conv_OCP.index = [f"Row_{i}" for i in range(len(OCP_conv))]   
df_conv_OCP.columns = [f"Converge OCV"]  
df_conv_OCP.to_csv(path_conv + file_name + "_conv.csv", index=True, header=True)  