# Evaluate fitting of real world data using the ACBP dataset

To evaluate PyChemelt's performance on a real world dataset the ACBP dataset from Hamborg et al. was processed and used. This datasets provides 16 termal denaturation curves with differing concentrations of GuHCL to fit the parameters in a 2 dimensional fashion to the curves regarding temperature and chemical denaturation

In [1]:
import pychemelt as pychem
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots

from pychemelt.utils.math import (
    linear_baseline,
    exponential_baseline
)

from pychemelt.utils.plotting import PlotConfig, AxisConfig, plot_unfolding
import plotly.graph_objs as go
from pychemelt import Sample


def kcal_to_kJ(value):
    """Converting kcal to kJ"""
    return value * 4.184

def C_to_K(value):
    """Converting °C to K"""
    return value + 273.15


In [2]:

# creating a Sample object for the storgae and processing of the DSF data
sample = Sample()

# reading the data
sample.read_file('../data/20191202_ACBP_15C_95C_processed.xlsx')

# Selecting conditions and data
sample.set_denaturant_concentrations()
sample.set_signal(['350nm', '330nm'])
sample.select_conditions([True]*15 + [False])


#Plotting the selected signals with the temperature
plot_unfolding(sample)




In [3]:
# Reducing the amount of data and preprocessing steps for a faster analysis
sample.pre_fit = False
sample.max_points = 200


sample.expand_multiple_signal()
sample.estimate_baseline_parameters(
    native_baseline_type='linear',
    unfolded_baseline_type='exponential',
)

# when using the quadratic unfolded baseline estimator no fitting was achieved, even when only considering
# 200 datapoints and after 50 min on a regular laptop

# estimations of parameters
sample.estimate_derivative()
sample.guess_Tm()

# Setting the number of residues of the protein for an initial estiamte of the Cp value
sample.n_residues = 86 # doi: 10.1042/bj2450857
sample.guess_Cp()


sample.set_signal_id()

# All local fit
sample.fit_thermal_unfolding_local()

#TODO: check out local unfoldign and why it only appears after running global and then not accuratly
sample.global_fit_params = 1
plot_unfolding(sample)

In [4]:

fitting_results = {
    "Tm (K)":[],
    "ΔH (kJ/mol)":[],
    "Cp (kJ/mol/K)":[],
    "m-value (kJ/mol/M)":[],
}



# fit with global thermodynamic parameters, local slopes and local baselines

sample.fit_thermal_unfolding_global()

Tm = C_to_K(sample.params_df.iloc[0,1])
dH = kcal_to_kJ(sample.params_df.iloc[1,1])
Cp = kcal_to_kJ(sample.params_df.iloc[2,1])
m = kcal_to_kJ(sample.params_df.iloc[3,1])

fitting_results["Tm (K)"].append(Tm)
fitting_results["ΔH (kJ/mol)"].append(dH)
fitting_results["Cp (kJ/mol/K)"].append(Cp)
fitting_results["m-value (kJ/mol/M)"].append(m)

print("Tm (K): ", Tm)
print("ΔH (kJ/mol): ", dH)
print("Cp (kJ/mol/K): ", Cp)
print("m-value (kJ/mol/M): " , m)

print(sample.params_df.iloc[:4,:3])


plot_unfolding(sample)


Tm (K):  327.6603055647827
ΔH (kJ/mol):  307.7390127437517
Cp (kJ/mol/K):  2.6859518422303057
m-value (kJ/mol/M):  13.859770122923006
              Parameter      Value  Relative error (%)
0               Tm (°C)  54.510306            0.127975
1         ΔH (kcal/mol)  73.551389            0.853341
2      Cp (kcal/mol/°C)   0.641958            4.939478
3  m-value (kcal/mol/M)   3.312565            0.530950


In [5]:

# fit with global thermodynamic parameters and slope, local baselines

sample.fit_thermal_unfolding_global_global()

Tm = C_to_K(sample.params_df.iloc[0,1])
dH = kcal_to_kJ(sample.params_df.iloc[1,1])
Cp = kcal_to_kJ(sample.params_df.iloc[2,1])
m = kcal_to_kJ(sample.params_df.iloc[3,1])

fitting_results["Tm (K)"].append(Tm)
fitting_results["ΔH (kJ/mol)"].append(dH)
fitting_results["Cp (kJ/mol/K)"].append(Cp)
fitting_results["m-value (kJ/mol/M)"].append(m)

print("Tm (K): ", Tm)
print("ΔH (kJ/mol): ", dH)
print("Cp (kJ/mol/K): ", Cp)
print("m-value (kJ/mol/M): " , m)

print(sample.params_df.iloc[:4,:3])

plot_unfolding(sample)

Tm (K):  326.2768328545378
ΔH (kJ/mol):  331.52805101996574
Cp (kJ/mol/K):  4.488092601008022
m-value (kJ/mol/M):  13.605548665040539
              Parameter      Value  Relative error (%)
0               Tm (°C)  53.126833            0.072626
1         ΔH (kcal/mol)  79.237106            0.647203
2      Cp (kcal/mol/°C)   1.072680            2.156267
3  m-value (kcal/mol/M)   3.251804            0.440289


In [6]:
# fit with global thermodynamic parameters, slopes and baselines

sample.fit_thermal_unfolding_global_global_global()

Tm = C_to_K(sample.params_df.iloc[0,1])
dH = kcal_to_kJ(sample.params_df.iloc[1,1])
Cp = kcal_to_kJ(sample.params_df.iloc[2,1])
m = kcal_to_kJ(sample.params_df.iloc[3,1])

fitting_results["Tm (K)"].append(Tm)
fitting_results["ΔH (kJ/mol)"].append(dH)
fitting_results["Cp (kJ/mol/K)"].append(Cp)
fitting_results["m-value (kJ/mol/M)"].append(m)

print("Tm (K): ", Tm)
print("ΔH (kJ/mol): ", dH)
print("Cp (kJ/mol/K): ", Cp)
print("m-value (kJ/mol/M): " , m)

print(sample.params_df.iloc[:4,:3])

plot_unfolding(sample)

Tm (K):  326.76111618774354
ΔH (kJ/mol):  317.802051913693
Cp (kJ/mol/K):  4.3181030116241095
m-value (kJ/mol/M):  13.368582414381846
              Parameter      Value  Relative error (%)
0               Tm (°C)  53.611116            0.068691
1         ΔH (kcal/mol)  75.956513            0.392750
2      Cp (kcal/mol/°C)   1.032051            0.844102
3  m-value (kcal/mol/M)   3.195168            0.322933


In [7]:
df = pd.DataFrame(fitting_results, index=["global", "global global", "global global global"])

print(df)

                          Tm (K)  ΔH (kJ/mol)  Cp (kJ/mol/K)  \
global                327.660306   307.739013       2.685952   
global global         326.276833   331.528051       4.488093   
global global global  326.761116   317.802052       4.318103   

                      m-value (kJ/mol/M)  
global                         13.859770  
global global                  13.605549  
global global global           13.368582  


The three fitting methods agree roughly on the melting temperature Tm and the m-value. The three methods differ, however, in the free enthalpy ΔH and heat constant Cp. The fit with the global thermodynamic parameters but local slopes and baselines has the lowest free enthalpy and Cp. the other two methods agree on the Cp but have differing free enthalpy, where the fitting method with the global thermodynamic parameters and slopes has the highest enthalpy.
The fitting methods with global parameters and slopes but local baselines is close to the results in the paper of hamborg et al. (Tm: 326.2 ± 0.1 K, ΔH: 345 ± 15 kJ/mol, Cp: 4.6 ± 0.6 kJ/mol/K, m-value: 14.8 ± 0.3 kJ/mol/M). All of them showed a good performance when looking at the melting temperature Tm but could not reproduce accuratly the heat capacity Cp and m-value but were also not far off. 
The fitting method used by Hamborh et al. was the method where everything was global, begging the 