In [2]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter
import seaborn as sns
from pathlib import Path
import matplotlib as mpl
from matplotlib.lines import Line2D
from scipy import stats
# set font size
mpl.rcParams.update({'font.size': 8})

# Set Helvetica font globally
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = ['Helvetica']

# Optional: make sure fonts are updated
mpl.rcParams['pdf.fonttype'] = 42  # Avoids Type 3 fonts in PDF output
mpl.rcParams['ps.fonttype'] = 42

In [65]:
nerd_sqlite = '../../../Core_nerd_analysis/nerd.sqlite'

db_path = Path(nerd_sqlite).expanduser().resolve()
if not db_path.exists():
    raise FileNotFoundError(f"Database not found: {db_path}")

conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row

def read_sql(query: str, params: dict | None = None) -> pd.DataFrame:
    """Run a SQL query against the configured database and return a DataFrame."""
    return pd.read_sql_query(query, conn, params=params or {})

rg_ids = [123, 124, 129, 130]  # P4P6 rg_ids

query = """
SELECT *
FROM probe_tc_fits_view
WHERE fit_kind = 'round3_constrained'
  AND rg_id IN ({})
  AND rt_protocol = 'MRT'
ORDER BY rg_id
""".format(",".join(["?"] * len(rg_ids)))

df = read_sql(query, params=rg_ids)
conn.close()

# get max log_kobs for each nt_base and save in dict
max_logkobs = {}
for base in df['nt_base'].unique():
    df_base = df[df['nt_base'] == base]
    max_logkobs[base] = df_base['log_kobs'].max()
max_logkobs

def calc_dG(logkobs, logkadd):
    R = 1.9872036e-3  # kcal/(mol*K)
    T = 298.15  # K
    
    logKKp1 = logkobs - logkadd
    KKp1 = np.exp(logKKp1) # K / (K+1)
    K = KKp1 / (1 - KKp1)
    dG = -R * T * np.log(K)

    return dG

# calculate dG using base-specific max log_kobs
df['dG'] = df.apply(lambda row: calc_dG(row['log_kobs'], max_logkobs[row['nt_base']]), axis=1)
df[(df['nt_site'] == 170) & (df['buffer_id'] == 3)]['dG'].values[0]

  K = KKp1 / (1 - KKp1)


np.float64(-1.2055878012962968)

In [66]:
fr3d_contacts = pd.read_csv('1GID_fr3d_all.csv', header = None)
fr3d_contacts['res1_site'] = fr3d_contacts[0].str.split('|').str[-1]
fr3d_contacts['res1_base'] = fr3d_contacts[0].str.split('|').str[-2]
fr3d_contacts['res1_chain'] = fr3d_contacts[0].str.split('|').str[-3]
fr3d_contacts['res2_site'] = fr3d_contacts[7].str.split('|').str[-1]
fr3d_contacts['res2_base'] = fr3d_contacts[7].str.split('|').str[-2]
fr3d_contacts['res2_chain'] = fr3d_contacts[7].str.split('|').str[-3]
fr3d_contacts = fr3d_contacts[['res1_chain', 'res1_site', 'res2_site', 2, 3, 4, 5, 6]]

In [67]:
fr3d_contacts[fr3d_contacts['res1_site'] == '170']

Unnamed: 0,res1_chain,res1_site,res2_site,2,3,4,5,6
251,A,170,171,s35,,,,
252,A,170,254,,,,cWW,
253,A,170,255,ns55,,,,
254,A,170,169,s53,,,,
830,B,170,171,s35,,,,
831,B,170,255,ns55,,,,
832,B,170,254,,,,cWW,
833,B,170,169,s53,,,,


C109 ribose zipper