### RNA folding kinetics control riboswitch sensitivity in vivo 
#### David Z. Bushhouse1,3 , Jiayu Fu1,3, & Julius B. Lucks1,2,3,4,5,6* 

 

1 Interdisciplinary Biological Sciences Graduate Program, Northwestern University, Evanston, Illinois 60208, USA 

2 Department of Chemical and Biological Engineering, Northwestern University, Evanston, Illinois 60208, USA 

3 Center for Synthetic Biology, Northwestern University, Evanston, Illinois 60208, USA 

4 Center for Water Research, Northwestern University, Evanston, Illinois 60208, USA 

5 Center for Engineering Sustainability and Resilience, Northwestern University, Evanston, Illinois 60208, USA 

6 International Institute for Nanotechnology, Northwestern University, Evanston, Illinois 60208, USA 


* To whom correspondence should be addressed. Tel: 1-847-467-2943; Email: jblucks@northwestern.edu  

## Fitting Dose-Response Data to the Hill Function


#### Import Packages

In [2]:
import pandas as pd
from scipy.optimize import curve_fit
import numpy as np
from sklearn.metrics import r2_score

### Fitting with free n

In [None]:
# This is designed to accept the fluorescence data as presented in Supplementary Data File 1
data = pd.read_excel('XXX')
id_vars = ['dose', 'mutant']
value_vars = ['Rep1', 'Rep2', 'Rep3', 'Rep4', 'Rep5', 'Rep6', 'Rep7', 'Rep8', 'Rep9']
melted_df = pd.melt(data, id_vars=id_vars, value_vars=value_vars, var_name='Replicate', value_name='MEF')
melted_df = melted_df.sort_values(by=['mutant', 'dose'])

# Get unique mutant names
mutant_names = melted_df['mutant'].unique()

# Create empty lists to store xdata and ydata for each mutant
all_xdata = []
all_ydata = []

# Iterate over each mutant
for mutant in mutant_names:
    # Filter the melted DataFrame for the current mutant
    mutant_df = melted_df[melted_df['mutant'] == mutant]
    
    # Extract xdata (dose values) and ydata (Y values) for the current mutant
    xdata = mutant_df['dose']
    ydata = mutant_df['MEF']
    
    # Append xdata and ydata to the respective lists
    all_xdata.append(xdata)
    all_ydata.append(ydata)

# Now all_xdata and all_ydata contain lists of xdata and ydata for each mutant, respectively


def sigmoid(x, EC50, n, bottom, top):
    return bottom + (((top-bottom) * (x**n))/((EC50**n) + (x**n)))

# Define the initial guess for the parameters
p0 = [0.0000001, 1, 1, 10]  # Adjust initial guess as needed

# Create empty DataFrame to store fit results
fit_results = pd.DataFrame(columns=['mutant', 'EC50', 'EC50 SE', 'n', 'n SE', 'Bottom', 'Bottom SE', 'Top', 'Top SE','Fold Change','Fold Change SE', 'R-squared'])

# Iterate over each mutant
for mutant in mutant_names:
    # Filter the melted DataFrame for the current mutant
    mutant_df = melted_df[melted_df['mutant'] == mutant]
    
    # Extract xdata (dose values) and ydata (MEF values) for the current mutant
    xdata = mutant_df['dose']
    ydata = mutant_df['MEF']
    
    # Remove rows with NaN values
    valid_indices = ~ydata.isnull()
    xdata = xdata[valid_indices]
    ydata = ydata[valid_indices]

    # Fit the sigmoidal function to the data
    try:
        popt, pcov = curve_fit(sigmoid, xdata, ydata, p0=p0)
        EC50, n, bottom, top = popt
        EC50_se, n_se, bottom_se, top_se = np.sqrt(np.diag(pcov))
        
        # Calculate R-squared
        y_pred = sigmoid(xdata, *popt)
        r_squared = r2_score(ydata, y_pred)
        
        # Append fit results to the DataFrame
        fit_results = fit_results.append({
            'mutant': mutant,
            'EC50': EC50,
            'EC50 SE': EC50_se,
            'n': n,
            'n SE': n_se,
            'Bottom': bottom,
            'Bottom SE': bottom_se,
            'Top': top,
            'Top SE': top_se,
            'R-squared': r_squared
        }, ignore_index=True)
    except RuntimeError:
        # Handle cases where fit fails
        fit_results = fit_results.append({
            'mutant': mutant,
            'EC50': np.nan,
            'EC50 SE': np.nan,
            'n': np.nan,
            'n SE': np.nan,
            'Bottom': np.nan,
            'Bottom SE': np.nan,
            'Top': np.nan,
            'Top SE': np.nan,
            'R-squared': np.nan
        }, ignore_index=True)


# Calculate fold change
fit_results['Fold Change'] = fit_results['Top'] / fit_results['Bottom']

# Calculate standard error of fold change using error propagation
top_std = fit_results['Top SE']
bottom_std = fit_results['Bottom SE']
fit_results['Fold Change SE'] = np.sqrt((top_std / fit_results['Top'])**2 + (bottom_std / fit_results['Bottom'])**2) * fit_results['Fold Change']


# Now fit_results DataFrame contains the fit results for each mutant
output_path = 'XXX'
fit_results.to_excel(output_path, index=False)

### Fitting with fixed n

In [3]:
# This is designed to accept the fluorescence data as presented in Supplementary Data File 1
data = pd.read_excel('XXX')
id_vars = ['dose', 'mutant']
value_vars = ['Rep1', 'Rep2', 'Rep3', 'Rep4', 'Rep5', 'Rep6', 'Rep7', 'Rep8', 'Rep9']
melted_df = pd.melt(data, id_vars=id_vars, value_vars=value_vars, var_name='Replicate', value_name='MEF')
melted_df = melted_df.sort_values(by=['mutant', 'dose'])

# Get unique mutant names
mutant_names = melted_df['mutant'].unique()

# Create empty lists to store xdata and ydata for each mutant
all_xdata = []
all_ydata = []

# Iterate over each mutant
for mutant in mutant_names:
    # Filter the melted DataFrame for the current mutant
    mutant_df = melted_df[melted_df['mutant'] == mutant]
    
    # Extract xdata (dose values) and ydata (Y values) for the current mutant
    xdata = mutant_df['dose']
    ydata = mutant_df['MEF']
    
    # Append xdata and ydata to the respective lists
    all_xdata.append(xdata)
    all_ydata.append(ydata)

# Now all_xdata and all_ydata contain lists of xdata and ydata for each mutant, respectively


# Define the sigmoidal function with 'n' as a parameter
def sigmoid(x, EC50, bottom, top, n='n'):  # Fill 'n' with fixed value
    return bottom + (((top-bottom) * (x**n))/((EC50**n) + (x**n)))

# Define the initial guess for the parameters, with n specified
p0 = [0.000001, 1, 10, 'n']

# Create empty DataFrame to store fit results
fit_results = pd.DataFrame(columns=['mutant', 'EC50', 'EC50 SE', 'n', 'Bottom', 'Bottom SE', 'Top', 'Top SE','Fold Change','Fold Change SE', 'R-squared'])

# Iterate over each mutant
for mutant in mutant_names:
    # Filter the melted DataFrame for the current mutant
    mutant_df = melted_df[melted_df['mutant'] == mutant]
    
    # Extract xdata (dose values) and ydata (MEF values) for the current mutant
    xdata = mutant_df['dose']
    ydata = mutant_df['MEF']
    
    # Remove rows with NaN values
    valid_indices = ~ydata.isnull()
    xdata = xdata[valid_indices]
    ydata = ydata[valid_indices]

    # Fit the sigmoidal function to the data
    try:
        popt, pcov = curve_fit(sigmoid, xdata, ydata, p0=p0[:3])  # Exclude 'n' from initial guess
        EC50, bottom, top = popt
        EC50_se, bottom_se, top_se = np.sqrt(np.diag(pcov))
        
        # Fixed 'n' value
        n = p0[3]
        
        # Calculate R-squared
        y_pred = sigmoid(xdata, EC50, bottom, top)
        r_squared = r2_score(ydata, y_pred)
        
        # Append fit results to the DataFrame
        fit_results = fit_results.append({
            'mutant': mutant,
            'EC50': EC50,
            'EC50 SE': EC50_se,
            'n': n,
            'Bottom': bottom,
            'Bottom SE': bottom_se,
            'Top': top,
            'Top SE': top_se,
            'R-squared': r_squared
        }, ignore_index=True)
    except RuntimeError:
        # Handle cases where fit fails
        fit_results = fit_results.append({
            'mutant': mutant,
            'EC50': np.nan,
            'EC50 SE': np.nan,
            'n': np.nan,
            'Bottom': np.nan,
            'Bottom SE': np.nan,
            'Top': np.nan,
            'Top SE': np.nan,
            'R-squared': np.nan
        }, ignore_index=True)

# Calculate fold change
fit_results['Fold Change'] = fit_results['Top'] / fit_results['Bottom']

# Calculate standard error of fold change using error propagation
top_std = fit_results['Top SE']
bottom_std = fit_results['Bottom SE']
fit_results['Fold Change SE'] = np.sqrt((top_std / fit_results['Top'])**2 + (bottom_std / fit_results['Bottom'])**2) * fit_results['Fold Change']


output_path = 'XXX'
fit_results.to_excel(output_path, index=False)