# Thermostability

Importing necessary libraries:

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pathlib import Path
from scipy.stats import linregress
import seaborn as sns
from typing import List, Dict, Tuple

from utils import *

%matplotlib inline

## Reading the files

Reading the melting curves:

In [None]:
# PBS
pbs_fl = pd.read_csv(
    "data/220627_cgre_50aq-deg_pbs_fluocurves2.txt",
    sep="\t",
    skiprows=1,
    low_memory=False,
)
pbs_fl = filter_extra_fl(subtract_blanks_fl(rename_columns_fl(pbs_fl)))

# Urea 1M
urea1M_fl = pd.read_csv(
    "data/220627_cgre_50aq-deg_urea1M_fluocurves2.txt",
    sep="\t",
    skiprows=1,
    low_memory=False,
)
urea1M_fl = filter_extra_fl(subtract_blanks_fl(rename_columns_fl(urea1M_fl)))

# Urea 2M
urea2M_fl = pd.read_csv(
    "data/220628_cgre_50aq-deg_urea2M_fluocurves2.txt",
    sep="\t",
    skiprows=1,
    low_memory=False,
)
urea2M_fl = filter_extra_fl(subtract_blanks_fl(rename_columns_fl(urea2M_fl)))

# Urea 3M
urea3M_fl = pd.read_csv(
    "data/220628_cgre_50aq-deg_urea3M_fluocurves.txt",
    sep="\t",
    skiprows=1,
    low_memory=False,
)
urea3M_fl = filter_extra_fl(subtract_blanks_fl(rename_columns_fl(urea3M_fl)))

# Urea 4M
urea4M_fl = pd.read_csv(
    "data/220630_melting_cgre_50acq-deg_urea4M_fluocurves.txt",
    sep="\t",
    skiprows=1,
    low_memory=False,
)
urea4M_fl = filter_extra_fl(subtract_blanks_fl(rename_columns_fl(urea4M_fl)))

# Urea 5M
urea5M_fl = pd.read_csv(
    "data/220701_melting_cgre_50acq-deg_urea5M_fluocurves.txt",
    sep="\t",
    skiprows=1,
    low_memory=False,
)
urea5M_fl = filter_extra_fl(subtract_blanks_fl(rename_columns_fl(urea5M_fl)))

# Urea 6M
urea6M_fl = pd.read_csv(
    "data/220701_melting_cgre_50acq-deg_urea6M_fluorcurves.txt",
    sep="\t",
    skiprows=1,
    low_memory=False,
)
urea6M_fl = filter_extra_fl(subtract_blanks_fl(rename_columns_fl(urea6M_fl)))

# Urea 7M
urea7M_fl = pd.read_csv(
    "data/220704_cgre_50aq-deg_urea7M_fluocurves.txt",
    sep="\t",
    skiprows=1,
    low_memory=False,
)
urea7M_fl = filter_extra_fl(subtract_blanks_fl(rename_columns_fl(urea7M_fl)))

# Urea 8M
urea8M_fl = pd.read_csv(
    "data/220704_cgre_50aq-deg_urea8M_fluocurves.txt",
    sep="\t",
    skiprows=1,
    low_memory=False,
)
urea8M_fl = filter_extra_fl(subtract_blanks_fl(rename_columns_fl(urea8M_fl)))

Reading the derivative of melting curves, $\frac{dF}{dT}$

In [None]:
# PBS
pbs_df = pd.read_csv("data/220627_cgre_50aq-deg_pbs_meltingcurves.txt", sep="\t")
pbs_df = reformat_df(filter_extra_df(subtract_blanks_df(pbs_df)))

# Urea 1M
urea1M_df = pd.read_csv("data/220627_cgre_50aq-deg_urea1M_meltingcurves.txt", sep="\t")
urea1M_df = reformat_df(filter_extra_df(subtract_blanks_df(urea1M_df)))

# Urea 2M
urea2M_df = pd.read_csv("data/220628_cgre_50aq-deg_urea2M_meltingcurves.txt", sep="\t")
urea2M_df = reformat_df(filter_extra_df(subtract_blanks_df(urea2M_df)))

# Urea 3M
urea3M_df = pd.read_csv("data/220628_cgre_50aq-deg_urea3M_meltingcurves.txt", sep="\t")
urea3M_df = reformat_df(filter_extra_df(subtract_blanks_df(urea3M_df)))

# Urea 4M
urea4M_df = pd.read_csv(
    "data/220630_melting_cgre_50acq-deg_urea4M_meltingcurves.txt", sep="\t"
)
urea4M_df = reformat_df(filter_extra_df(subtract_blanks_df(urea4M_df)))

# Urea 5M
urea5M_df = pd.read_csv(
    "data/220701_melting_cgre_50acq-deg_urea5M_meltingcurves.txt", sep="\t"
)
urea5M_df = reformat_df(filter_extra_df(subtract_blanks_df(urea5M_df)))

# Urea 6M
urea6M_df = pd.read_csv("data/220701_cgre_50aq-deg_urea6M_meltingcurves.txt", sep="\t")
urea6M_df = reformat_df(filter_extra_df(subtract_blanks_df(urea6M_df)))

# Urea 7M
urea7M_df = pd.read_csv("data/220704_cgre_50aq-deg_urea7M_meltingcurves.txt", sep="\t")
urea7M_df = reformat_df(filter_extra_df(subtract_blanks_df(urea7M_df)))

# Urea 8M
urea8M_df = pd.read_csv("data/220704_cgre_50aq-deg_urea8M_meltingcurves.txt", sep="\t")
urea8M_df = reformat_df(filter_extra_df(subtract_blanks_df(urea8M_df)))

## Plotting the curves

All melting curves and derivatives of them:

PBS

In [None]:
plot_curves(pbs_fl, "Fluorescence", "PBS")
plot_curves(pbs_df, "dF/dT", "PBS_dF")

Urea 1M:

In [None]:
plot_curves(urea1M_fl, "Fluorescence", "Urea_1M")
plot_curves(urea1M_df, "dF/dT", "Urea_1M_dF")

Urea 2M:

In [None]:
plot_curves(urea2M_fl, "Fluorescence", "Urea_2M")
plot_curves(urea2M_df, "dF/dT", "Urea_2M_dF")

Urea 3M:

In [None]:
plot_curves(urea3M_fl, "Fluorescence", "Urea_2M")
plot_curves(urea3M_df, "dF/dT", "Urea_2M_dF")

Urea 4M:

In [None]:
plot_curves(urea4M_fl, "Fluorescence", "Urea_4M")
plot_curves(urea4M_df, "dF/dT", "Urea_4M_dF")

Urea 5M:

In [None]:
plot_curves(urea5M_fl, "Fluorescence", "Urea_5M")
plot_curves(urea5M_df, "dF/dT", "Urea_5M_dF")

Urea 6M:

In [None]:
plot_curves(urea6M_fl, "Fluorescence", "Urea_6M")
plot_curves(urea6M_df, "dF/dT", "Urea_6M_dF")

Urea 7M:

In [None]:
plot_curves(urea7M_fl, "Fluorescence", "Urea_7M")
plot_curves(urea7M_df, "dF/dT", "Urea_7M_dF")

Urea 8M:

In [None]:
plot_curves(urea8M_fl, "Fluorescence", "Urea_8M")
plot_curves(urea8M_df, "dF/dT", "Urea_8M_dF")

### Plotting curves per gene, separated for each well


Here, we check if there are some weird outlier wells. 

PBS:

In [None]:
plot_curves_per_gene(pbs_fl, "Fluorescence", "PBS")
plot_curves_per_gene(pbs_df, "dF/dT", "PBS_dF")

Urea 1M:

In [None]:
plot_curves_per_gene(urea1M_fl, "Fluorescence", "Urea_1M")
plot_curves_per_gene(urea1M_df, "dF/dT", "Urea_1M_dF")

Urea 2M:

In [None]:
plot_curves_per_gene(urea2M_fl, "Fluorescence", "Urea_2M")
plot_curves_per_gene(urea2M_df, "dF/dT", "Urea_2M_dF")

Urea 3M:

In [None]:
plot_curves_per_gene(urea3M_fl, "Fluorescence", "Urea_3M")
plot_curves_per_gene(urea3M_df, "dF/dT", "Urea_3M_dF")

Urea 4M:

In [None]:
plot_curves_per_gene(urea4M_fl, "Fluorescence", "Urea_4M")
plot_curves_per_gene(urea4M_df, "dF/dT", "Urea_4M_dF")

Urea 5M:

In [None]:
plot_curves_per_gene(urea5M_fl, "Fluorescence", "Urea_5M")
plot_curves_per_gene(urea5M_df, "dF/dT", "Urea_5M_dF")

Urea 6M:

In [None]:
plot_curves_per_gene(urea6M_fl, "Fluorescence", "Urea_6M")
plot_curves_per_gene(urea6M_df, "dF/dT", "Urea_6M_dF")

Urea 7M:

In [None]:
plot_curves_per_gene(urea7M_fl, "Fluorescence", "Urea_7M")
plot_curves_per_gene(urea7M_df, "dF/dT", "Urea_7M_dF")

Urea 8M:

In [None]:
plot_curves_per_gene(urea8M_fl, "Fluorescence", "Urea_8M")
plot_curves_per_gene(urea8M_df, "dF/dT", "Urea_8M_dF")

## Wells to exclude:

Outliers based on the curves above:

In [None]:
pbs_exclude = ["A9", "A10", "A1", "A8", ]
urea1M_exclude = ["A3", "A9", "B11", "A11", "A10", "A8", ] 
urea2M_exclude = ["E6", "A9", "B11", "A11", "A10", ]
urea3M_exclude = ["E11", "A11", "F1", "A8", "A4", ]
urea4M_exclude = ["B11", "A11", "A4", "A9", "B9", "B7", "A10", ] 
urea5M_exclude = ["A11", "A10", "A8", "A4", "A9", "A7", ]
urea6M_exclude = ["C11", "A11", "A8", "A4", "A9", "A10", ]
urea7M_exclude = ["A11", "F1", "A4", "A9", "A10", ] 
urea8M_exclude = ["A11", "B11", "A9", "A10", ]

In [None]:
pbs_fl, pbs_df = exclude_wells(pbs_fl, pbs_df, pbs_exclude)
urea1M_fl, urea1M_df = exclude_wells(urea1M_fl, urea1M_df, urea1M_exclude)
urea2M_fl, urea2M_df = exclude_wells(urea2M_fl, urea2M_df, urea2M_exclude)
urea3M_fl, urea3M_df = exclude_wells(urea3M_fl, urea3M_df, urea3M_exclude)
urea4M_fl, urea4M_df = exclude_wells(urea4M_fl, urea4M_df, urea4M_exclude)
urea5M_fl, urea5M_df = exclude_wells(urea5M_fl, urea5M_df, urea5M_exclude)
urea6M_fl, urea6M_df = exclude_wells(urea6M_fl, urea6M_df, urea6M_exclude)
urea7M_fl, urea7M_df = exclude_wells(urea7M_fl, urea7M_df, urea7M_exclude)
urea8M_fl, urea8M_df = exclude_wells(urea8M_fl, urea8M_df, urea8M_exclude)

## Normalizing signals

In [None]:
# PBS
pbs_fl_norm = normalize_signal_per_well(pbs_fl)
pbs_df_norm = normalize_signal_per_well(pbs_df)

# Urea 1M
urea1M_fl_norm = normalize_signal_per_well(urea1M_fl)
urea1M_df_norm = normalize_signal_per_well(urea1M_df)

# Urea 2M
urea2M_fl_norm = normalize_signal_per_well(urea2M_fl)
urea2M_df_norm = normalize_signal_per_well(urea2M_df)

# Urea 3M
urea3M_fl_norm = normalize_signal_per_well(urea3M_fl)
urea3M_df_norm = normalize_signal_per_well(urea3M_df)

# Urea 4M
urea4M_fl_norm = normalize_signal_per_well(urea4M_fl)
urea4M_df_norm = normalize_signal_per_well(urea4M_df)

# Urea 5M
urea5M_fl_norm = normalize_signal_per_well(urea5M_fl)
urea5M_df_norm = normalize_signal_per_well(urea5M_df)

# Urea 6M
urea6M_fl_norm = normalize_signal_per_well(urea6M_fl)
urea6M_df_norm = normalize_signal_per_well(urea6M_df)

# Urea 7M
urea7M_fl_norm = normalize_signal_per_well(urea7M_fl)
urea7M_df_norm = normalize_signal_per_well(urea7M_df)

# Urea 8M
urea8M_fl_norm = normalize_signal_per_well(urea8M_fl)
urea8M_df_norm = normalize_signal_per_well(urea8M_df)

### Plotting curves as mean of each well:

PBS:

In [None]:
pbs_melting_temperatures = plot_melting_curves_per_gene_mean(pbs_fl, pbs_df, "PBS")

Urea 1M:

In [None]:
urea1M_melting_temperatures = plot_melting_curves_per_gene_mean(urea1M_fl, urea1M_df, "Urea_1M")

Urea 2M:

In [None]:
urea2M_melting_temperatures = plot_melting_curves_per_gene_mean(urea2M_fl, urea2M_df, "Urea_2M")

Urea 3M:

In [None]:
urea3M_melting_temperatures = plot_melting_curves_per_gene_mean(urea3M_fl, urea3M_df, "Urea_3M")

Urea 4M:

In [None]:
urea4M_melting_temperatures = plot_melting_curves_per_gene_mean(urea4M_fl, urea4M_df, "Urea_4M")

Urea 5M:

In [None]:
urea5M_melting_temperatures = plot_melting_curves_per_gene_mean(urea5M_fl, urea5M_df, "Urea_5M")

Urea 6M:

In [None]:
urea6M_melting_temperatures = plot_melting_curves_per_gene_mean(urea6M_fl, urea6M_df, "Urea_6M")

Urea 7M:

In [None]:
urea7M_melting_temperatures = plot_melting_curves_per_gene_mean(urea7M_fl, urea7M_df, "Urea_7M")

Urea 8M:

In [None]:
urea8M_melting_temperatures = plot_melting_curves_per_gene_mean(urea8M_fl, urea8M_df, "Urea_8M")

#### Normalized

PBS:

In [None]:
pbs_melting_temperatures = plot_melting_curves_per_gene_mean(pbs_fl_norm, pbs_df_norm, "PBS_norm")

Urea 1M:

In [None]:
urea1M_melting_temperatures = plot_melting_curves_per_gene_mean(urea1M_fl_norm, urea1M_df_norm, "Urea_1M_norm")

Urea 2M:

In [None]:
urea2M_melting_temperatures = plot_melting_curves_per_gene_mean(urea2M_fl_norm, urea2M_df_norm, "Urea_2M_norm")

Urea 3M:

In [None]:
urea3M_melting_temperatures = plot_melting_curves_per_gene_mean(urea3M_fl_norm, urea3M_df_norm, "Urea_3M_norm")

Urea 4M:

In [None]:
urea4M_melting_temperatures = plot_melting_curves_per_gene_mean(urea4M_fl_norm, urea4M_df_norm, "Urea_4M_norm")

Urea 5M:

In [None]:
urea5M_melting_temperatures = plot_melting_curves_per_gene_mean(urea5M_fl_norm, urea5M_df_norm, "Urea_5M_norm")

Urea 6M:

In [None]:
urea6M_melting_temperatures = plot_melting_curves_per_gene_mean(urea6M_fl_norm, urea6M_df_norm, "Urea_6M_norm")

Urea 7M:

In [None]:
urea7M_melting_temperatures = plot_melting_curves_per_gene_mean(urea7M_fl_norm, urea7M_df_norm, "Urea_7M_norm")

Urea 8M:

In [None]:
urea8M_melting_temperatures = plot_melting_curves_per_gene_mean(urea8M_fl_norm, urea8M_df_norm, "Urea_8M_norm")

## Plotting melting temperature against urea concentration

In [None]:
df_melting_temperatures = pd.DataFrame(
    [
        pbs_melting_temperatures,
        urea1M_melting_temperatures,
        urea2M_melting_temperatures,
        urea3M_melting_temperatures,
        urea4M_melting_temperatures,
        urea5M_melting_temperatures,
        urea6M_melting_temperatures,
        urea7M_melting_temperatures,
        urea8M_melting_temperatures,
    ]
)

In [None]:
for col in df_melting_temperatures:
    sns.lineplot(
        x=df_melting_temperatures.index,
        y=df_melting_temperatures[col],
        label=col,
        legend="brief",
        palette=sns.hls_palette(17, s=0.6),
    )
    plt.legend(frameon=False, bbox_to_anchor=(1, 0.5), loc="center left")
    plt.ylabel("Melting temperature [°C]")
    plt.xlabel("Urea concentration [M]")

plt.savefig("melt_temp_for_urea_conc")

### Linear regression - urea x temp

Calculate slope:

In [None]:
temp_urea_slope_per_gene = {}
for gene in df_melting_temperatures.columns:
    slope = linregress(df_melting_temperatures.index, df_melting_temperatures[gene])[0] 
    
    temp_urea_slope_per_gene[gene] = slope

In [None]:
temp_urea_slope_per_gene

In [None]:
del temp_urea_slope_per_gene["wt"]

### Correlation with mutational robustness

In [None]:
half_mut_rob_per_gene = {'cgreGFP': 2.3598224708239237,
 2880: 2.0475472322138573,
 575: 1.648929743078233,
 83: 0.4876710628582579,
 121: 2.087983470121052,
 13: 1.7655853131041812,
 567: 0.38997176295167435,
 3224: 1.364926200088591,
 900: 1.6385128907623048,
 626: 1.4664978833021196,
 985: 1.8639110762878979,
 911: 2.1317630470636817,
 1414: 4.5209917944211,
 1338: 0.9603722537683815,
 132: 3.2927772016988577,
 9708: 3.077040183325697,
 4111: 1.072414674710171}
half_mut_rob_per_gene = {
    str(key): value for key, value in half_mut_rob_per_gene.items()
}
df_half_mut_rob = pd.DataFrame(
    half_mut_rob_per_gene, index=["half_n_aamut"]
).T.sort_index()

In [None]:
df_half_mut_rob["half_n_aamut"].corr(pd.Series(temp_urea_slope_per_gene))

## Plot dF/dT for each gene with different Urea concentrations

In [None]:
# Urea concentration list
urea_concs = list(range(0, 9))

# All dF/dT dataframes in one list
df_df = [
    pbs_df_norm,
    urea1M_df_norm,
    urea2M_df_norm,
    urea3M_df_norm,
    urea4M_df_norm,
    urea5M_df_norm,
    urea6M_df_norm,
    urea7M_df_norm,
    urea8M_df_norm,
]

# Dictionary urea_conc: dF/dT
urea_to_df = dict(zip(urea_concs, df_df))
genes = set(pbs_df.gene)

# Ploting
fig = plt.figure(figsize=[15, 18], dpi=200)
palette = sns.color_palette("mako", n_colors=len(urea_concs) + 1)
plots = []

for i, gene in enumerate(genes):
    plot = plt.subplot(6, 3, i + 1)
    for urea_conc in urea_concs:
        # Taking dataframe for the according urea concentration
        data_df = urea_to_df[urea_conc]

        # Creating dataframe with data only for one gene
        df_df = data_df[data_df.gene == gene]

        # Taking mean over all wells for the gene
        mean_per_gene_df = df_df.groupby(["gene", "temperature"], as_index=False).mean()

        # Index for melting temperature
        i_melt_temp = mean_per_gene_df.groupby("gene").idxmax()["signal"][gene]

        # Taking the melting temperature and appending it to the dictionary
        melting_temperature = mean_per_gene_df.iloc[i_melt_temp, 1]

        # According absorbance
        max_absorbance = mean_per_gene_df.iloc[i_melt_temp, 2]

        # dF/dT curve
        line = sns.lineplot(
            data=df_df,
            x=df_df["temperature"],
            y=df_df["signal"],
            ci=None,
            #legend="brief",
            ax=plot,
            #label=f"{urea_conc}M Urea",
            color=palette[urea_conc + 1],
        )
        plots.append(line)

        # Line along the melting temperature
        plot.axvline(
            melting_temperature, ymax=0.9, color=palette[0], linestyle="--", linewidth=1
        )

        # Labels
        plt.title(f"cgre{gene if gene != 'wt' else 'WT'}", fontsize=18)
        plt.xlabel(None)
        plt.ylabel(None)
        # plot.set_xlabel("Temperature [°C]", fontsize=16)
        # plot.set_ylabel("dF/dT", fontsize=16)
        # plot.set_ylim(0, max_absorbance + max_absorbance * 0.1)
        # plot.legend(frameon=False, bbox_to_anchor=(1, 1))
    plt.tight_layout()
#plt.tight_layout()

fig.legend(
    handles=[line for line in plots[0].get_lines() if line._linestyle != "--"],
    labels=[f"{conc} M Urea" for conc in range(9)],
    bbox_to_anchor=(0.827, 0.155),
    borderaxespad=0,
    fontsize=14,
    frameon=False,
)
fig.supxlabel("Temperature [°C]", fontsize=18, y=-0.01)
fig.supylabel("dF/dT", fontsize=18, x=-0.)

plt.savefig("curves_per_gene_per_urea_conc.png", dpi=200)

In [None]:
plots[1].get_lines()[3]._linestyle

## Correlation for each urea concentration with mutational robustness

In [None]:
from scipy.stats import pearsonr

In [None]:
corr_mut_rob_per_urea = {}
for urea_conc in urea_concs:
    common_ids = np.intersect1d(df_melting_temperatures.iloc[urea_conc].index, df_half_mut_rob["half_n_aamut"].index)
    corr_mut_rob_per_urea[urea_conc] = pearsonr(df_melting_temperatures.iloc[urea_conc][common_ids], 
                                                df_half_mut_rob["half_n_aamut"][common_ids])

In [None]:
corr_mut_rob_per_urea

In [None]:
corr_results = {
                "Corr. coef.": [i[0] for i in corr_mut_rob_per_urea.values()],
                "p-value": [i[1] for i in corr_mut_rob_per_urea.values()],
                }

corr_results = pd.DataFrame(corr_results, index=list(range(9)))
corr_results