In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.stats import linregress
from scipy import stats
from uncertainties import ufloat, unumpy
from uncertainties.umath import *
from uncertainties import unumpy as unp
from collections import OrderedDict, defaultdict
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['font.family'] = 'Arial'
sns.set_context('poster', font_scale=1)
%matplotlib inline

In [2]:
R = 0.00198588 #gas constant in kcal/mol

bol = 1.380649e-23 * 0.000239006# boltzman constant in kcal/K

plank = 6.62607015e-34 * 0.000239006 # planks constant kcal*s

# function to convert pB to dG_binding
def dg_to_pB(dG, T = 65+273.15):
    global R
    pB = exp(-dG/(R*T))
    return pB

def k_to_dg(kon, koff, T = 65+273.15):
    global R
    dG = -R*T*log((kon/koff))
    return dG

def pB_to_dg(pb, ct, T = 65+273.15):
    global R
    dG = -R*T*log(((ct*pb)**2/(ct*(1-pb))))
    return dG

# calculate R squared
def r_squred(x, y, func, parms):
    residuals = y - func(x, *parms)
    
    ss_res = np.sum(residuals**2)
    
    ss_tot = np.sum((y - np.mean(y))**2)
    
    return 1-ss_res/ss_tot

# calculate R squared, for the euqation using harmonic mean of temperature
def r_squred_new(x, y, func, parms):
    residuals = y - np.log(func(x, *parms)/x)
    
    ss_res = np.sum(residuals**2)
    
    ss_tot = np.sum((y - np.mean(y))**2)
    
    return 1-ss_res/ss_tot

# get kon from dG and koff
def get_kon(dG, koff, T = 37+273.15):
    global R
    return koff*(exp(dG/(-R*T)))

# vanthoff equation without error propogation
def vanhoff(x, a, b, tharm):
    result = (bol*x/plank)*np.exp(-a/(R*tharm) - (b/R) * (1/x - 1/tharm))
    return result

# vanthoff equation with error propogation
def vanhoff_err(x, a, b, tharm):
    result = (bol*x/plank)*exp(-a/(R*tharm) - (b/R) * (1/x - 1/tharm))
    return result

# dH, dS extrapolation, linear function dG = dH - T*dS
def line(x, a, b):
    return a * x + b

def get_parm_for_linear_k(kon_ss, koff_ss, T_ss, tharm):
    '''
    linear fitting for kon and koff using harmonic mean temperature equation
    kon_ss, koff_ss are on and off rate constants, in ufloat format
    T_ss is the experimental temperatures, in ufloat format
    tharm is the harmonic mean of T_ss
    
    return the raw fitting results + cofactors for kon and koff linear equations
    '''
    # set x, y and y error
    y_on_ss = np.array([i.n for i in kon_ss])
    y_on_error_ss = np.array([i.s for i in koff_ss])

    y_off_ss = np.array([i.n for i in koff_ss])
    y_off_error_ss = np.array([i.s for i in koff_ss])

    # linear fitting fo kon and koff
    fiton_result, fiton_err = curve_fit(lambda x, a, b : vanhoff(x, a, b, tharm), T_ss, y_on_ss, sigma=y_on_error_ss)

    fitoff_result, fitoff_err = curve_fit(lambda x, a, b : vanhoff(x, a, b, tharm), T_ss, y_off_ss, sigma=y_off_error_ss)

    # fitting results
    fiton_a, fiton_b = fiton_result

    onerr_a, onerr_b = np.sqrt(np.diag(fiton_err))

    fitoff_a, fitoff_b = fitoff_result

    offerr_a, offerr_b = np.sqrt(np.diag(fitoff_err))

    # fitting results in float
    on_a_fl = ufloat(fiton_a, onerr_a)
    on_b_fl = ufloat(fiton_b, onerr_b)

    off_a_fl = ufloat(fitoff_a, offerr_a)
    off_b_fl = ufloat(fitoff_b, offerr_b)
    
    return fiton_result, fitoff_result, on_a_fl, on_b_fl, off_a_fl, off_b_fl

# linear extrapolation for methyl rotation rates in monomer

In [3]:
# kex pB for monomer at 20C, 25C, 37C 
kex_m = [ufloat(1400.4, 19.8), ufloat(2124.5, 72),ufloat(5817., 285.)]

pB_m = [ufloat(6.6/100, 0.2/100), ufloat(6.134/100, 0.1/100),ufloat(5.3/100, 0.4/100)]

# x axis, three temperatures 55C and 65C
T_m = np.array([273.15+20, 273.15 + 25.0, 273.15 + 37.0])

# harmonic mean of tempertaures
tharm_m = (1/(np.sum(1/T_m)))*len(T_m)

kon_m = [kex_m[i]*pB_m[i] for i in range(len(T_m))]

koff_m = [kex_m[i]*(1-pB_m[i]) for i in range(len(T_m))]

# get a, b in linear equation for kon and koff
fiton_result_m, fitoff_result_m, on_a_fl_m, on_b_fl_m, off_a_fl_m, off_b_fl_m = get_parm_for_linear_k(kon_m, koff_m, T_m, tharm_m)

# calculate kon/koff at given temperatures
kon_55_m = vanhoff_err(55+273.15, on_a_fl_m, on_b_fl_m, tharm_m)
koff_55_m = vanhoff_err(55+273.15, off_a_fl_m, off_b_fl_m, tharm_m)

# share data to different notebooks
%store on_a_fl_m
%store on_b_fl_m
%store off_a_fl_m
%store off_b_fl_m
%store tharm_m

Stored 'on_a_fl_m' (Variable)
Stored 'on_b_fl_m' (Variable)
Stored 'off_a_fl_m' (Variable)
Stored 'off_b_fl_m' (Variable)
Stored 'tharm_m' (float64)


# linear extrapolation for methyl rotation rates in ssRNA

In [4]:
# kex pB for ssGGACU at 20C, 25C, 37C and 45C
kex = [ufloat(506, 14), ufloat(640.08, 14.077),ufloat(1957.09, 60.0), ufloat(4413.46, 238.0)]

pB = [ufloat(9.2/100, 0.5/100), ufloat(9.2/100, 0.01/100),ufloat(9.0/100, 0.2/100),ufloat(7.0/100, 0.4/100)]

# x axis, three temperatures 55C and 65C
T_ss = np.array([273.15+20, 273.15 + 25.0, 273.15 + 37.0, 
                 273.15 + 45.0])

# harmonic mean of tempertaures
tharm_ss = (1/(np.sum(1/T_ss)))*len(T_ss)

kon_ss = [kex[i]*pB[i] for i in range(len(T_ss))]

koff_ss = [kex[i]*(1-pB[i]) for i in range(len(T_ss))]

# get a, b in linear equation for kon and koff
fiton_result_ss, fitoff_result_ss, on_a_fl_ss, on_b_fl_ss, off_a_fl_ss, off_b_fl_ss = get_parm_for_linear_k(kon_ss, koff_ss, T_ss, tharm_ss)

# calculate kon/koff at given temperatures
kon_55 = vanhoff_err(55+273.15, on_a_fl_ss, on_b_fl_ss, tharm_ss)
koff_55 = vanhoff_err(55+273.15, off_a_fl_ss, off_b_fl_ss, tharm_ss)

# share data to different notebooks
%store on_a_fl_ss
%store on_b_fl_ss
%store off_a_fl_ss
%store off_b_fl_ss
%store tharm_ss

Stored 'on_a_fl_ss' (Variable)
Stored 'on_b_fl_ss' (Variable)
Stored 'off_a_fl_ss' (Variable)
Stored 'off_b_fl_ss' (Variable)
Stored 'tharm_ss' (float64)


# linear extrapolation for methyl rotation rates in dsRNA

In [5]:
# for DNA melting at 50C, using the dsRNA rates in dsRNA and extrapolate the temperature to 50C

# x axis, three temperatures 37C, 55C and 65C
T_ds = np.array([273.15+37, 273.15 + 55.0, 273.15 + 65.0])

# harmonic mean of tempertaures
tharm_ds = (1/(np.sum(1/T_ds)))*len(T_ds)

kon_ds = [ufloat(1.26, 0.34), ufloat(6.4, 0.4), ufloat(17.4, 0.4)]

koff_ds = [ufloat(209.1, 44.0), ufloat(572.2 , 23.9), ufloat(1095.6, 12.0)]

# get a, b in linear equation for kon and koff
fiton_result_ds, fitoff_result_ds, on_a_fl_ds, on_b_fl_ds, off_a_fl_ds, off_b_fl_ds = get_parm_for_linear_k(kon_ds, koff_ds, T_ds, tharm_ds)

# calculate kon/koff at given temperatures
kon_55 = vanhoff_err(55+273.15, on_a_fl_ds, on_b_fl_ds, tharm_ds)
koff_55 = vanhoff_err(55+273.15, off_a_fl_ds, off_b_fl_ds, tharm_ds)

# share data to different notebooks
%store on_a_fl_ds
%store on_b_fl_ds
%store off_a_fl_ds
%store off_b_fl_ds
%store tharm_ds

Stored 'on_a_fl_ds' (Variable)
Stored 'on_b_fl_ds' (Variable)
Stored 'off_a_fl_ds' (Variable)
Stored 'off_b_fl_ds' (Variable)
Stored 'tharm_ds' (float64)
