In [29]:
import os
import pandas as pd
import requests
import numpy as np
from Bio.PDB import MMCIFParser, Superimposer, PDBIO
from optparse import OptionParser
from scipy.spatial.distance import squareform
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from collections import Counter
from Bio.PDB import MMCIFParser, Superimposer, PDBIO
from optparse import OptionParser
from scipy.spatial.distance import squareform
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import re
from scipy.stats import norm
from pymol import cmd
import glob #need for removing cif files
import json #need for loading json files

# Import raw reactivities data

In [43]:
raw_ecoli_DMS= pd.read_csv('../../data/reactivities_analysis/reactivities/DMS_ecoli_profile.txt', delimiter= '\t')
raw_yeast_DMS= pd.read_csv('../../data/reactivities_analysis/reactivities/DMS_yeast_profile.txt', delimiter= '\t')
raw_human_DMS= pd.read_csv('../../data/reactivities_analysis/reactivities/DMS_human_profile.txt', delimiter= '\t')
raw_ecoli_ETC= pd.read_csv('../../data/reactivities_analysis/reactivities/ETC_ecoli_profile.txt', delimiter= '\t')

In [18]:
#removing organism name from the RNA_type column (RNA)
raw_yeast_DMS['RNA'] = raw_yeast_DMS['RNA'].str.replace('yeast', '')
raw_human_DMS['RNA'] = raw_human_DMS['RNA'].str.replace('human', '')
raw_ecoli_ETC['RNA'] = raw_ecoli_ETC['RNA'].str.replace('ecoli', '')

# Import base pair information

In [37]:
#directories for the json files containing the outputs from DSSR characterization
#for ecoli, PDB_ID: 4YBB was selected in this work
#Escherichia coli (4YBB, X-ray, 2.10 Å)
ecoli_js= '../../data/reactivities_analysis/bp_infos/4YBB-dssr.json'

#for yeast, PDB_ID: 4V88 was selected in this work
#Saccharomyces cerevisiae (4V88, X-ray, 3.00 Å)
yeast_js= '../../data/reactivities_analysis/bp_infos/4V88-dssr.json'

#for human, PDB_ID: 8QOI was selected in this work
#Homo sapiens (8QOI, cryo-EM, 1.90 Å) 
human_js= '../../data/reactivities_analysis/bp_infos/8QOI-dssr.json'

In [38]:
#this function will take the directory of the json file containing the DSSR output of the corresponding structure file
def json_2_bp_csv(dr):
    with open(dr) as f:
        data= json.loads(f.read())
        bps_list= pd.json_normalize(data, record_path =['pairs'])
        print (bps_list.shape)
        return bps_list

In [39]:
ecoli_bp= json_2_bp_csv(ecoli_js)

yeast_bp= json_2_bp_csv(yeast_js)

human_bp= json_2_bp_csv(human_js)

(4589, 8)
(4865, 8)
(2577, 8)


# Functions

In [7]:
#normalization function
def normalize_nt(df):
    #filter the raw reactivities of A, C, G, and U residues
    df_A= df[df['Sequence']=='A']
    df_A.index= np.arange(0, len(df_A))
    
    df_C= df[df['Sequence']=='C']
    df_C.index= np.arange(0, len(df_C))
    
    df_G= df[df['Sequence']=='G']
    df_G.index= np.arange(0, len(df_G))
    
    df_U= df[df['Sequence']=='U']
    df_U.index= np.arange(0, len(df_U))
    
    #identifying 90-98th percentile (same as 2-8% filtration in 2-8% normalization)
    pA90= df_A['raw_reactivity'].astype(float).quantile(0.90)
    pA98= df_A['raw_reactivity'].astype(float).quantile(0.98)
    
    pC90= df_C['raw_reactivity'].astype(float).quantile(0.90)
    pC98= df_C['raw_reactivity'].astype(float).quantile(0.98)
    
    pG90= df_G['raw_reactivity'].astype(float).quantile(0.90)
    pG98= df_G['raw_reactivity'].astype(float).quantile(0.98)
    
    pU90= df_U['raw_reactivity'].astype(float).quantile(0.90)
    pU98= df_U['raw_reactivity'].astype(float).quantile(0.98)
    
    #filter the raw_reactivities within 90-98th percentile of the raw reactivities of A, C, G, and U
    df_A1= df_A[df_A['raw_reactivity'].between(pA90, pA98, inclusive='both')]
    df_A1.index= np.arange(0, len(df_A1))
    
    df_C1= df_C[df_C['raw_reactivity'].between(pC90, pC98, inclusive='both')]
    df_C1.index= np.arange(0, len(df_C1))
    
    df_G1= df_G[df_G['raw_reactivity'].between(pG90, pG98, inclusive='both')]
    df_G1.index= np.arange(0, len(df_G1))
    
    df_U1= df_U[df_U['raw_reactivity'].between(pU90, pU98, inclusive='both')]
    df_U1.index= np.arange(0, len(df_U1))
    
    #calculate the average of the raw_reactivities with 90- 98th percentile raw_reactivities of A, C, G, and U
    avg_A= df_A1['raw_reactivity'].mean()
    avg_C= df_C1['raw_reactivity'].mean()
    avg_G= df_G1['raw_reactivity'].mean()
    avg_U= df_U1['raw_reactivity'].mean()
    
    #set the normalized reactivity to raw reactivity divided by the average for each nucleotide
    df['norm_reactivity_nt'] = np.where(df["Sequence"] == "A", df['raw_reactivity'] / avg_A, np.nan)
    df['norm_reactivity_nt'] = np.where(df["Sequence"] == "C", df['raw_reactivity'] / avg_C, df['norm_reactivity_nt'])
    df['norm_reactivity_nt'] = np.where(df["Sequence"] == "G", df['raw_reactivity'] / avg_G, df['norm_reactivity_nt'])
    df['norm_reactivity_nt'] = np.where(df["Sequence"] == "U", df['raw_reactivity'] / avg_U, df['norm_reactivity_nt'])
    #values in norm_reactivity >1 set to 1, values less than -0.1 set to -0.1
    df['norm_reactivity_nt'] = np.where(df['norm_reactivity_nt'] > 1, 1, df['norm_reactivity_nt'])
    df['norm_reactivity_nt'] = np.where(df['norm_reactivity_nt'] < -0.1, -0.1, df['norm_reactivity_nt'])
    #replace every value in profile that is np.nan with "NA"
    #profile = profile.replace(np.nan, "nan")
    #return the profile
    return df

In [8]:
#one-tailed p-value
def calculate_one_tailed_pvalues(df):
    # Create a new column to store p-values
    df['p_value'] = None
    
    for nt in ['A', 'C', 'G', 'U']:
        subset = df[df['Sequence'] == nt]['norm_reactivity_nt']  # Select values for this nucleotide
        mean_val = subset.mean()  # Compute mean
        std_val = subset.std()  # Compute standard deviation
        
        # Avoid division by zero
        if std_val == 0:
            df.loc[df['Sequence'] == nt, 'p_value'] = 1.0  # Assign p-value = 1 if all values are identical
        else:
            # Compute p-values for values greater than the mean
            df.loc[df['Sequence'] == nt, 'p_value'] = subset.apply(lambda x: 1 - norm.cdf(x, loc=mean_val, scale=std_val))
    
    return df

In [9]:
#this function will create a base pair dataframe for each rRNA of ecoli, yeast and human
#each dataframe will contain four columns
#column 1: RNA ---> type of rRNA: 5S, 16S, 23S, 5.8S, 18S, 25S, 28S
#column 2: residue_index
#column 3: residue_ID
#column 4: pairing ----> '1' if the residue is forming a WCF base, '0' if it not forming a WCF base pair
def pair_df(df, p, c, R, b):
    #p= PDB_ID
    #c= chain ID 
    #R= RNA type such as 5S, 5.8S, 16S, 18S, 23S, 25S, 28S
    #if b=0, only canonical base pair should be considered
    #if b=1, all base pair should be considered
    
    #adding columns for the residue index and residue ID
    df['res_IND1'] = df['nt1'].str.extract(r'(\d+)$')
    df['res_IND2'] = df['nt2'].str.extract(r'(\d+)$')
    
    def extract_res1(nt1_value):
        match = re.search(r'\.(\D+)\d', nt1_value)
        return match.group(1) if match else None 
    
    df['res_ID1'] = df['nt1'].apply(extract_res1)
    df['res_ID2'] = df['nt2'].apply(extract_res1)
    
    
    #filtering base pairs of interests
    if b==0: #take only the canonical base pairs
        df_bp= df[df['name'].isin(['WC', 'Wobble'])]
        df_bp.index= np.arange(0, len(df_bp))
        
    elif b==1: #take all the base pairs
        df_bp= df.copy()
        
    #filtering the chain of interest
    df_R = df_bp[(df_bp['nt1'].str.startswith(c)) & (df_bp['nt1'].str.startswith(c))] #check here again
    df_R.index= np.arange(0, len(df_R))
    
    #generating list of residues index for the residues which are participating in forming any base pair of interest
    bp_res = list(pd.concat([df_R['res_IND1'], df_R['res_IND2']]).dropna().astype(int).unique())
    
    
    def download_and_extract_residues(pdb_id, chain_id=None):
    
        cmd.delete(pdb_id)
        cmd.delete('all')
        """
        Downloads a PDB or mmCIF structure, loads it into PyMOL, and extracts a dictionary 
        mapping residue indices to residue names for a specific chain.

        :param pdb_id: PDB ID of the RNA structure.
        :param chain_id: Specific chain ID to filter (default: None, gets all chains).
        :return: Dictionary {residue index: residue name}
        """
        # Download and load the structure using `create()` to avoid conflicts
        cmd.fetch(pdb_id, async_=0)  # Fetch the PDB structure

        # If it's an mmCIF file, use create() to load it properly
        if pdb_id.endswith('.cif'):
            cmd.create(pdb_id, pdb_id)  # Create a new object for mmCIF files
        
        cmd.remove("solvent")  # Removes water molecules (solvent)
        cmd.remove("resn MG+CA+ZN+FE+NA+K+SPD+OHX+PUT+MPD+PG4")
    
        # Construct selection string based on chain_id
        selection = pdb_id  # Default selection
        if chain_id:
            selection = f"{pdb_id} and chain {chain_id}"

        # Initialize dictionary to store residue data
        res_dict = {}

        # Use cmd.iterate() to extract residue information
        cmd.iterate(selection, "res_dict[int(resi)] = resn", space={'res_dict': res_dict})

        return res_dict
    
    #generating a dictionary where the keys and values will be the 
    #residue index and residue ID for the residues within the chain of interest
    ind_id_dict = download_and_extract_residues(p, c)
    
    #converting ind_id_dict into a dataframe
    bp_pair = pd.DataFrame(list(ind_id_dict.items()), columns=['residue_index', 'residue_ID'])
    bp_pair.insert(0, 'RNA', R)
    
    #assigning pair or unpaired information for each residues 
    for i, j in enumerate(bp_pair['residue_index']):
        if j in bp_res:
            bp_pair.loc[(bp_pair['residue_index']==j), "pairing"] = 1
        else:
            bp_pair.loc[(bp_pair['residue_index']==j), "pairing"] = 0
            
    return bp_pair

In [10]:
#add description of this function
def fill_missing_res_index(df, column_name):
    """
    Adds missing integers in a specified column and inserts new rows in a DataFrame.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame.
    column_name (str): The column containing integer values.
    
    Returns:
    pd.DataFrame: A new DataFrame with missing integers filled.
    """
    # Find the full range of numbers
    full_range = set(range(1, df[column_name].max() + 1))
    existing_values = set(df[column_name])
    missing_values = sorted(full_range - existing_values)
    
    # Create new DataFrame for missing values
    missing_df = pd.DataFrame({column_name: missing_values})

    # Append missing values and sort
    df_filled = pd.concat([df, missing_df], ignore_index=True).sort_values(by=column_name)

    return df_filled.reset_index(drop=True)

In [11]:
# add description of this function
def subtract_after_value(df, column_name, threshold_value):
    """
    Subtracts 1 from all items in a specified column after a given threshold value.

    Parameters:
    df (pd.DataFrame): The input DataFrame.
    column_name (str): The column containing integer values.
    threshold_value (int): The value after which subtraction should occur.

    Returns:
    pd.DataFrame: The modified DataFrame with adjusted values.
    """
    df = df.copy()  # To avoid modifying the original DataFrame
    mask = df[column_name] > threshold_value  # Select rows where value is greater than threshold
    df.loc[mask, column_name] -= 1  # Subtract 1 from those values
    return df

# Normalizing reactivities by nucleotides

In [53]:
# normalizing reactivities by nucleotides
raw_ecoli_DMS1= normalize_nt(raw_ecoli_DMS)
raw_yeast_DMS1= normalize_nt(raw_yeast_DMS)
raw_human_DMS1= normalize_nt(raw_human_DMS)
raw_ecoli_ETC1= normalize_nt(raw_ecoli_ETC)

# Calculating one-tailed p-values for reactivities

In [54]:
#calculating one-tailed p-values for all reactivities
raw_ecoli_DMS2= calculate_one_tailed_pvalues(raw_ecoli_DMS1)
raw_yeast_DMS2= calculate_one_tailed_pvalues(raw_yeast_DMS1)
raw_human_DMS2= calculate_one_tailed_pvalues(raw_human_DMS1)
raw_ecoli_ETC2= calculate_one_tailed_pvalues(raw_ecoli_ETC1)

# Extracting base pair information for different rRNAs of corresponding organisms

In [46]:
#ectracting base pair information for ecoli rRNAs (5S, 16S, and 23S)
#ecoli_5S
#df= ecoli_bp
#p= '4YBB'
#c= 'CB'
#R= '5S'
#b= 0
e5S= pair_df(ecoli_bp, '4YBB', 'CB', '5S', 0)

#ecoli_16S
#df= ecoli_bp
#p= '4YBB'
#c= 'AA'
#R= '16S'
#b= 0
e16S= pair_df(ecoli_bp, '4YBB', 'AA', '16S', 0)

#ecoli_23S
#df= ecoli_bp
#p= '4YBB'
#c= 'CA'
#R= '23S'
#b= 0
e23S= pair_df(ecoli_bp, '4YBB', 'CA', '23S', 0)

 ExecutiveLoad-Detail: Detected mmCIF
 ExecutiveLoad-Detail: Detected mmCIF
 ExecutiveLoad-Detail: Detected mmCIF


In [48]:
#extracting base pair information for yeast rRNAs (5S, 5.8S, 18S, 25S)
#yeast_5S
#df= yeast_bp
#p= '4V88'
#c= 'A3'
#R= '5S'
#b= 0
y5S= pair_df(yeast_bp, '4V88', 'A3', '5S', 0)

#yeast_5.8S
#df= yeast_bp
#p= '4V88'
#c= 'A4'
#R= '5.8S'
#b= 0
y58S= pair_df(yeast_bp, '4V88', 'A4', '5.8S', 0)

#yeast_18S
#df= yeast_bp
#p= '4V88'
#c= 'A2'
#R= '18S'
#b= 0
y18S= pair_df(yeast_bp, '4V88', 'A2', '18S', 0)

#yeast_25S
#df= yeast_bp
#p= '4V88'
#c= 'A1'
#R= '25S'
#b= 0
y25S= pair_df(yeast_bp, '4V88', 'A1', '25S', 0)

 ExecutiveLoad-Detail: Detected mmCIF
 ExecutiveLoad-Detail: Detected mmCIF
 ExecutiveLoad-Detail: Detected mmCIF
 ExecutiveLoad-Detail: Detected mmCIF


In [49]:
#extracting base pair information for human rRNAs (5S, 18S, 28S)
#human_5S
#df= human_bp
#p= '8QOI'
#c= 'L7'
#R= '5S'
#b= 0
h5S= pair_df(human_bp, '8QOI', 'L7', '5S', 0)

#human_18S
#df= human_bp
#p= '8QOI'
#c= 'S2'
#R= '18S'
#b= 0
h18S= pair_df(human_bp, '8QOI', 'S2', '18S', 0)

#human_28S
#df= human_bp
#p= '8QOI'
#c= 'L5'
#R= '28S'
#b= 0
h28S= pair_df(human_bp, '8QOI', 'L5', '28S', 0)

 ExecutiveLoad-Detail: Detected mmCIF
 ExecutiveLoad-Detail: Detected mmCIF
 ExecutiveLoad-Detail: Detected mmCIF


In [51]:
#remove the structure files (in .cif format) generated in the steps above
# List all .cif files in the current directory
cif_files = glob.glob("*.cif")

# Remove each file
for file in cif_files:
    try:
        os.remove(file)
        print(f"Removed: {file}")
    except Exception as e:
        print(f"Error removing {file}: {e}")

# Splitting reactivities by rRNA types to compare with the base pair information

In [56]:
#spliting by RNA type for E. coli (for DMS reactivities)

raw_ecoli_DMS2_5S= raw_ecoli_DMS2[raw_ecoli_DMS2['RNA']=='5S']
raw_ecoli_DMS2_5S.index= np.arange(0, len(raw_ecoli_DMS2_5S))
print ('-------------------------------------------ecoli---->5S')
print (e5S.shape)
print (raw_ecoli_DMS2_5S.shape)


raw_ecoli_DMS2_16S= raw_ecoli_DMS2[raw_ecoli_DMS2['RNA']=='16S']
raw_ecoli_DMS2_16S.index= np.arange(0, len(raw_ecoli_DMS2_16S))
print ('-------------------------------------------ecoli---->16S')
print (e16S.shape)
print (raw_ecoli_DMS2_16S.shape)

raw_ecoli_DMS2_23S= raw_ecoli_DMS2[raw_ecoli_DMS2['RNA']=='23S']
raw_ecoli_DMS2_23S.index= np.arange(0, len(raw_ecoli_DMS2_23S))
print ('-------------------------------------------ecoli---->23S')
print (e23S.shape)
print (raw_ecoli_DMS2_23S.shape)

-------------------------------------------ecoli---->5S
(118, 4)
(120, 29)
-------------------------------------------ecoli---->16S
(1534, 4)
(1542, 29)
-------------------------------------------ecoli---->23S
(2898, 4)
(2904, 29)


In [65]:
#spliting by RNA type for E. coli (for ETC reactivities)

raw_ecoli_ETC2_5S= raw_ecoli_ETC2[raw_ecoli_ETC2['RNA']=='5S']
raw_ecoli_ETC2_5S.index= np.arange(0, len(raw_ecoli_ETC2_5S))
print ('-------------------------------------------ecoli---->5S')
print (e5S.shape)
print (raw_ecoli_ETC2_5S.shape)

raw_ecoli_ETC2_16S= raw_ecoli_ETC2[raw_ecoli_ETC2['RNA']=='16S']
raw_ecoli_ETC2_16S.index= np.arange(0, len(raw_ecoli_ETC2_16S))
print ('-------------------------------------------ecoli---->16S')
print (e16S.shape)
print (raw_ecoli_ETC2_16S.shape)

raw_ecoli_ETC2_23S= raw_ecoli_ETC2[raw_ecoli_ETC2['RNA']=='23S']
raw_ecoli_ETC2_23S.index= np.arange(0, len(raw_ecoli_ETC2_23S))
print ('-------------------------------------------ecoli---->23S')
print (e23S.shape)
print (raw_ecoli_ETC2_23S.shape)

-------------------------------------------ecoli---->5S
(118, 4)
(0, 29)
-------------------------------------------ecoli---->16S
(1534, 4)
(0, 29)
-------------------------------------------ecoli---->23S
(2898, 4)
(0, 29)


In [58]:
#spliting by RNA type for yeast

raw_yeast_DMS2_5S= raw_yeast_DMS2[raw_yeast_DMS2['RNA']=='5S']
raw_yeast_DMS2_5S.index= np.arange(0, len(raw_yeast_DMS2_5S))
print ('-------------------------------------------yeast---->5S')
print (y5S.shape)
print (raw_yeast_DMS2_5S.shape)

raw_yeast_DMS2_58S= raw_yeast_DMS2[raw_yeast_DMS2['RNA']=='5.8S']
raw_yeast_DMS2_58S.index= np.arange(0, len(raw_yeast_DMS2_58S))
print ('-------------------------------------------yeast---->5.8S')
print (y58S.shape)
print (raw_yeast_DMS2_58S.shape)

raw_yeast_DMS2_18S= raw_yeast_DMS2[raw_yeast_DMS2['RNA']=='18S']
raw_yeast_DMS2_18S.index= np.arange(0, len(raw_yeast_DMS2_18S))
print ('-------------------------------------------yeast---->18S')
print (y18S.shape)
print (raw_yeast_DMS2_18S.shape)

raw_yeast_DMS2_25S= raw_yeast_DMS2[raw_yeast_DMS2['RNA']=='25S']
raw_yeast_DMS2_25S.index= np.arange(0, len(raw_yeast_DMS2_25S))
print ('-------------------------------------------yeast---->25S')
print (y25S.shape)
print (raw_yeast_DMS2_25S.shape)

-------------------------------------------yeast---->5S
(121, 4)
(0, 29)
-------------------------------------------yeast---->5.8S
(158, 4)
(0, 29)
-------------------------------------------yeast---->18S
(1781, 4)
(0, 29)
-------------------------------------------yeast---->25S
(3149, 4)
(0, 29)


In [61]:
#spliting by RNA type for human

raw_human_DMS2_5S= raw_human_DMS2[raw_human_DMS2['RNA']=='5S']
raw_human_DMS2_5S.index= np.arange(0, len(raw_human_DMS2_5S))
print ('-------------------------------------------human---->5S')
print (h5S.shape)
print (raw_human_DMS2_5S.shape)

raw_human_DMS2_18S= raw_human_DMS2[raw_human_DMS2['RNA']=='18S']
raw_human_DMS2_18S.index= np.arange(0, len(raw_human_DMS2_18S))
print ('-------------------------------------------human---->18S')
print (h18S.shape)
print (raw_human_DMS2_18S.shape)

raw_human_DMS2_28S= raw_human_DMS2[raw_human_DMS2['RNA']=='28S']
raw_human_DMS2_28S.index= np.arange(0, len(raw_human_DMS2_28S))
print ('-------------------------------------------human---->28S')
print (h28S.shape)
print (raw_human_DMS2_28S.shape)

-------------------------------------------human---->5S
(120, 4)
(0, 29)
-------------------------------------------human---->18S
(1740, 4)
(0, 29)
-------------------------------------------human---->28S
(3773, 4)
(0, 29)


In [66]:
def fill_missing_res_index(df, column_name):
    """
    Adds missing integers in a specified column and inserts new rows in a DataFrame.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame.
    column_name (str): The column containing integer values.
    
    Returns:
    pd.DataFrame: A new DataFrame with missing integers filled.
    """
    # Find the full range of numbers
    full_range = set(range(1, df[column_name].max() + 1))
    existing_values = set(df[column_name])
    missing_values = sorted(full_range - existing_values)
    
    # Create new DataFrame for missing values
    missing_df = pd.DataFrame({column_name: missing_values})

    # Append missing values and sort
    df_filled = pd.concat([df, missing_df], ignore_index=True).sort_values(by=column_name)

    return df_filled.reset_index(drop=True)

In [67]:
def subtract_after_value(df, column_name, threshold_value):
    """
    Subtracts 1 from all items in a specified column after a given threshold value.

    Parameters:
    df (pd.DataFrame): The input DataFrame.
    column_name (str): The column containing integer values.
    threshold_value (int): The value after which subtraction should occur.

    Returns:
    pd.DataFrame: The modified DataFrame with adjusted values.
    """
    df = df.copy()  # To avoid modifying the original DataFrame
    mask = df[column_name] > threshold_value  # Select rows where value is greater than threshold
    df.loc[mask, column_name] -= 1  # Subtract 1 from those values
    return df

In [68]:
e5S= fill_missing_res_index(e5S, 'residue_index')
e16S= fill_missing_res_index(e16S, 'residue_index')
e23S= fill_missing_res_index(e23S, 'residue_index')

In [69]:
y18S= fill_missing_res_index(y18S, 'residue_index')
y25S= fill_missing_res_index(y25S, 'residue_index')

In [70]:
h28S= subtract_after_value(h28S, 'residue_index', 880)

In [71]:
h18S= fill_missing_res_index(h18S, 'residue_index')
h28S= fill_missing_res_index(h28S, 'residue_index')

In [73]:
with pd.ExcelWriter('../../results/reactivities_and_base_pairs.xlsx', engine='xlsxwriter') as writer:
    raw_ecoli_DMS2_5S.to_excel(writer, sheet_name= 'ecoli_5S_DMS', index=False)
    raw_ecoli_DMS2_16S.to_excel(writer, sheet_name= 'ecoli_16S_DMS', index=False)
    raw_ecoli_DMS2_23S.to_excel(writer, sheet_name= 'ecoli_23S_DMS', index=False)
    
    raw_ecoli_ETC2_5S.to_excel(writer, sheet_name= 'ecoli_5S_ETC', index=False)
    raw_ecoli_ETC2_16S.to_excel(writer, sheet_name= 'ecoli_16S_ETC', index=False)
    raw_ecoli_ETC2_23S.to_excel(writer, sheet_name= 'ecoli_23S_ETC', index=False)
    
    raw_yeast_DMS2_5S.to_excel(writer, sheet_name= 'yeast_5S_DMS', index=False)
    raw_yeast_DMS2_58S.to_excel(writer, sheet_name= 'yeast_58S_DMS', index=False)
    raw_yeast_DMS2_18S.to_excel(writer, sheet_name= 'yeast_18S_DMS', index=False)
    raw_yeast_DMS2_25S.to_excel(writer, sheet_name= 'yeast_25S_DMS', index=False)
    
    raw_human_DMS2_5S.to_excel(writer, sheet_name= 'human_5S_DMS', index=False)
    raw_human_DMS2_18S.to_excel(writer, sheet_name= 'human_18S_DMS', index=False)
    raw_human_DMS2_28S.to_excel(writer, sheet_name= 'human_28S_DMS', index=False)
    
    e5S.to_excel(writer, sheet_name= 'ecoli_5S_pairs', index=False)
    e16S.to_excel(writer, sheet_name= 'ecoli_16S_pairs', index=False)
    e23S.to_excel(writer, sheet_name= 'ecoli_23S_pairs', index=False)
    
    y5S.to_excel(writer, sheet_name= 'yeast_5S_pairs', index=False)
    y58S.to_excel(writer, sheet_name= 'yeast_58S_pairs', index=False)
    y18S.to_excel(writer, sheet_name= 'yeast_18S_pairs', index=False)
    y25S.to_excel(writer, sheet_name= 'yeast_25S_pairs', index=False)
    
    h5S.to_excel(writer, sheet_name= 'human_5S_pairs', index=False)
    h18S.to_excel(writer, sheet_name= 'human_18S_pairs', index=False)
    h28S.to_excel(writer, sheet_name= 'human_28S_pairs', index=False)