# <img src="https://ukcatalysishub.co.uk/wp-content/uploads/2020/04/Diamond-light-source-logo-lowres.jpg" width="150px" class="pull-right" style="display: inline-block; margin-top: 5px;">
# <span style="font-size:larger;font-weight:bold;text-decoration:underline;">Linear Sweep Voltammogram(LSV) Integrator</span>

## Calculating regression models for LSV's and calculating definite integrals
### By Adam Morris

<details>
    <summary> <strong>⬇️ Click on me to see a summary of this script ⬇️</strong></summary>
<br>
    - This is a script set up for users to integrate peaks on a LSV. 
<br>
    1. Please alter the filename to the file you wish to visualise and integrate
<br>
    2. Then restart and run all cells
<br>
    3. The script will do some calculations and then promt you to enter the x-coordinates to define the definite integral
<br>
    4. To determine these coordinates you can interact with the graphs to zoom into the region of interest and place you cursor over the regression line to get the exact cooridnates.
<br>
    5. Once the coordinates are entered the integration will be calculated and results displayed at the end
    </details>

In [9]:
from sklearn.linear_model import LinearRegression
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from scipy.integrate import fixed_quad
import plotly.graph_objects as go
import csv
import os

# <div class="alert alert-block alert-info"><b>Change the directory path and filename:</b></div>
# Then
# <div class="alert alert-block alert-danger"><b>Restart and Run all cells</b></div>

In [10]:
filename = '../data/30cyc.csv'

In [11]:
data = pd.read_csv(filename)

# adding column names to the data
data.columns = ['L', 'Intensity2']
param1 = data['Intensity2']
param2 = data['L']
x_label = 'L'
y_label = 'Intensity2'

# getting the values of each row
param_1 = param1.values
param_2 = param2.values

# creating the plotly figure
fig = go.Figure()
# adding a trace of the current vs voltage
fig.add_trace(go.Scatter(x=param_2, y=param_1, mode='lines', name='data', line=dict(color='blue', width=1)))

In [12]:

####################################################################################
# Fitting a polynomial regression to the data
####################################################################################

# creating a dataframe to store the results
results = pd.DataFrame(columns=['degree order', 'R^2 score'])
volts = param_2.reshape(-1, 1)  # reshaping voltage data from 1D array to 2D array

scaler = StandardScaler().fit(
    volts)  # scales the data so it has a mean of 0 and a sd of 1. the fit method calculates the mean and sd
volts = scaler.transform(
    volts)  # applies the scaling to the volts data. trasnofrm standardises the data by substracting the mean of the data and dividing by the sd
for degree in range(1, 150):
    poly = PolynomialFeatures(degree=degree)  # degree is the polynomial degree to be calculated.
    X = poly.fit_transform(volts)  # transforms the data into a polynomial of the specified degree.
    # creating a matrix X that includes original volts data as well as all polynomial features up to degree, degree.

    reg = LinearRegression().fit(X,
                                 param_1)  # fit trains the model on the input data X and the output variable current.
    # LinearRegression fits a linear regression model to the data X (the input features) and y (the output features, current).

    r2 = r2_score(param_1, reg.predict(
        X))  # calculates the coefficient of determination (r^2). takes the dependent variable, current,
    # and the predicted values of dependent variable obtained from the regression
    results = results.append({'degree order': degree, 'R^2 score': r2},
                             ignore_index=True)  # appends the results to the dataframe

print(results)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated a

     degree order  R^2 score
0             1.0   0.009813
1             2.0   0.352474
2             3.0   0.355701
3             4.0   0.617596
4             5.0   0.619772
..            ...        ...
144         145.0   0.046619
145         146.0   0.046005
146         147.0   0.046058
147         148.0   0.045364
148         149.0   0.045306

[149 rows x 2 columns]



The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated a

In [13]:
print(results['R^2 score'].dtype)

float64


In [14]:
max_r2 = results['R^2 score'].max()

max_index = results['R^2 score'].idxmax()

max_degree = results.loc[max_index, 'degree order']

print(f'The maximum R^2 score is {max_r2} and the degree order is {max_degree}')

The maximum R^2 score is 0.999878747728988 and the degree order is 45.0


In [15]:
best_degree = int(max_degree)
poly = PolynomialFeatures(degree=best_degree)
X = poly.fit_transform(volts)

powers = poly.powers_
coef = np.polyfit(X[:, 0], param_1, best_degree)  # calculates the coefficients of the polynomial of degree best_degree

poly_func = ''
for i, power in enumerate(powers):
    if power[0] == 0:
        poly_func += f'{coef[i]:.2f}'
    else:
        poly_func += f'{coef[i]:.2f}x^{power[0]}+'

print(poly_func)

1788.321788.32x^1+1788.32x^2+1788.32x^3+1788.32x^4+1788.32x^5+1788.32x^6+1788.32x^7+1788.32x^8+1788.32x^9+1788.32x^10+1788.32x^11+1788.32x^12+1788.32x^13+1788.32x^14+1788.32x^15+1788.32x^16+1788.32x^17+1788.32x^18+1788.32x^19+1788.32x^20+1788.32x^21+1788.32x^22+1788.32x^23+1788.32x^24+1788.32x^25+1788.32x^26+1788.32x^27+1788.32x^28+1788.32x^29+1788.32x^30+1788.32x^31+1788.32x^32+1788.32x^33+1788.32x^34+1788.32x^35+1788.32x^36+1788.32x^37+1788.32x^38+1788.32x^39+1788.32x^40+1788.32x^41+1788.32x^42+1788.32x^43+1788.32x^44+1788.32x^45+



Polyfit may be poorly conditioned



In [16]:
reg = LinearRegression().fit(X, param_1)
fig.add_trace(go.Scatter(x=param2, y=reg.predict(X), mode='lines', name='regression', line=dict(color='red', width=1)))
fig.update_layout(title='Polynomial Regression of degree {} (R2={:.6f})'.format(best_degree, max_r2),
                  xaxis_title=x_label, yaxis_title=y_label)
fig.update_layout(legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.0))
fig.show()

#########################################################################################

value = float(input('Enter an x-coordinate: '))  # user enters x coordinates
value_2 = float(input('Enter an x-coordinate: '))

idx = data.loc[data['L'] == value].index[0]  # determines the index of the x coordinate in the data
idx_2 = data.loc[data['L'] == value_2].index[0]

x_0 = param2.iloc[idx]  # extracts the exact x coordinate from the data
y_0 = param1.iloc[idx]

x_1 = param2.iloc[idx_2]
y_1 = param1.iloc[idx_2]

y_1 = reg.predict(poly.fit_transform(scaler.transform(np.array(value).reshape(-1,
                                                                              1))))  # predicting the y value for the user-provided x coordinate using the previously trained model
# firstly reshaped to 2D array (same shape as training data), and then standardised, then transformed into a polynomial feature matrix, then y value predicted.
y_2 = reg.predict(poly.fit_transform(scaler.transform(np.array(value_2).reshape(-1, 1))))

# calculations for a straight line between the two points
x = [x_0, x_1]

m = (y_2 - y_1) / (x_1 - x_0)
b = y_1 - m * x_1
m = np.isscalar(m)
b = np.isscalar(b)

y = [y_1[0], y_2[0]]
'''
y = [m * xi + b for xi in x]
'''

fig.add_trace(
    go.Scatter(x=x, y=y, mode='lines', name='Line between selected Potentials', line=dict(color='green', width=1)))
fig.update_layout(title='Polynomial Regression of degree {} (R2={:.6f})'.format(best_degree, max_r2),
                  xaxis_title=x_label, yaxis_title=y_label)
fig.update_layout(legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.0))


fig.show()

In [17]:
y_pred = reg.predict(X)

x = np.linspace(x_0, x_1,
                len(y_pred))  # creates an array, x, containing a sequence of evenly spaced numbers between x_0 and x_1 with the same length as y_pred


def line(x, m, b):
    return [m * i + b for i in
            x]  # defines a function line that takes x, m and b as arguments and returns an array where each element is a value of the equation
    # for each element i in the array x


y_line = line(x, m,
              b)  # calls the line function and assigns the result to y_line. produces an array that represents the line of best fit


# Define the line function
def line_2(x, m, b):
    return m * x + b


# Compute the coefficients of the line of best fit
m = (y_1 - y_2) / (x_1 - x_0)
b = y_1 - m * x_1

# Compute the integral of the line of best fit between x_0 and x_1
area_2 = fixed_quad(lambda x: line_2(x, m, b), x_0, x_1, n=1000)[0]

# convesrion into arrays
volts = np.array(volts)
y_pred = np.array(y_pred)
y_line = np.array(y_line)

# converts into 1D arrays
volts_flat = volts.flatten()
y_pred_flat = y_pred.flatten()
y_line_flat = y_line.flatten()

# calculate the area
# area = fixed_quad(lambda x: abs(np.interp(x, volts, y_pred) - np.interp(x, volts, y_line)), x_0, x_1, n=1000)
area = fixed_quad(lambda x: abs(np.interp(x.flatten(), param2, y_pred_flat)), x_0, x_1, n=1000)

area_calc = area[0] - area_2


In [18]:
print(f'The area of the specified region is {area_calc}')

charge_C = area_calc
first = param2.iloc[0]
last = param2.iloc[-1]
E_range = last-first
scan_rate = 0.050
t = (2*E_range)/ scan_rate
I = charge_C/t
F = 96485
moles_elec = 2
num_elec_mol = charge_C/(F*2)
Avogadro = 6.022E23
num_elec = num_elec_mol*Avogadro

calcs = {'Area': [area_calc],
         'Current (A)': [I],
        'Charge (C)': [charge_C],
        'Number of Electrons': [num_elec]}

calculations = pd.DataFrame(calcs)
display(calculations)


The area of the specified region is 4629.91519055565


Unnamed: 0,Area,Current (A),Charge (C),Number of Electrons
0,4629.915191,145.827775,4629.915191,1.444854e+22


# Results and corresponding parameters in the following file:

In [19]:
results_filename = 'Electrochemical_stripping_data.csv'
if os.path.isfile(results_filename):
    with open(results_filename, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([filename, value, value_2, area_calc, charge_C, num_elec])
else:
    with open(results_filename, 'w', newline='') as file_2:
        writer = csv.writer(file_2)
        writer.writerow(['Filename', 'x_coordinate_1', 'x_coordinate_2', 'Area', 'Charge (C)', 'Number of electrons'])
        writer.writerow([filename, value, value_2, area_calc, charge_C, num_elec])

In [21]:
df = pd.read_csv('Electrochemical_stripping_data.csv')
df.tail()

Unnamed: 0,Filename,x_coordinate_1,x_coordinate_2,Area,Charge (C),Number of electrons
34,data/integration_test_data.csv,-1.0,1.0,-1.329999,-1.329999,-4.150518e+18
35,data/Cd_strip_8_eald.csv,-0.8003,-0.6003,0.18133,0.009066,5.659477e+16
36,../data/30cyc.csv,1.94721,2.24586,4598.231279,229.911564,1.435153e+21
37,../data/30cyc.csv,1.94721,2.25372,4629.915191,231.49576,1.445042e+21
38,../data/30cyc.csv,1.94721,2.25372,4629.915191,4629.915191,1.444854e+22
