In [5]:
%matplotlib inline
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

from statannotations.Annotator import Annotator

from pandas import CategoricalDtype

In [None]:
def genotype_plots(excel_file: str):
    plots = []
    categories = ["WT_NoFoci", "WT_Foci", "KA_NoFoci", "KA_Foci"]
    sheets = pd.read_excel(excel_file, sheet_name=None, header=0)

    pairs_basic = [("WT_NoFoci", "KA_NoFoci"),]
    pairs_all = [("WT_NoFoci", "KA_NoFoci"), ("WT_NoFoci", "KA_Foci"), ("KA_NoFoci", "KA_Foci")]

    def contrast_channel(sheet: pd.DataFrame, channel2: str, stain: str, ax):
        data = sheet[sheet["channel 2"] == channel2]

        if data.empty:
            return None

        pairs = pairs_basic if data[data["genotype_foci"] == "KA_Foci"].empty else pairs_all
            
        ax = sns.boxplot(
            data=data,
            x="genotype_foci",
            y="Pearson's R (above threshold)",
            hue="Genotype",
            dodge=False,
            ax=ax,
        )
        ax = sns.swarmplot(
            data=data,
            x="genotype_foci",
            y="Pearson's R (above threshold)",
            hue="Genotype",
            alpha=0.5,
            linewidth=1,
            dodge=False,
            ax=ax,
        )

        annotator = Annotator(ax, pairs, data=data, x="genotype_foci", y="Pearson's R (above threshold)")
        annotator.configure(test="t-test_welch", text_format='full', loc='inside', show_test_name=False)
        annotator.apply_and_annotate()

        ax.set_ylabel(f"Pearson's R ({stain} ~ {channel2})")
        ax.set_xlabel("")
        ax.axhline(y=0.0, color="black")
        ax.set_ylim(ymin=-1.0, ymax=1.0)
        ax.set_title(stain)
        ax.get_legend().remove()
        return ax
    
    num_fig_rows = len(sheets) * 2

    fig= plt.figure(dpi=300, figsize=(5, num_fig_rows * 5))
    ax_idx = 1
    for stain, sheet in sheets.items():
        sheet = (
            sheet.fillna("")
            .astype(str)
            .replace({"Yes": True, "No": False, "": False})
            .convert_dtypes()
        )
        sheet["Germ cell"] = sheet["Germ cell"].astype(int)
        sheet["Genotype"] = sheet["Genotype"].astype(
            CategoricalDtype(categories=["WT", "KA"], ordered=True)
        )
        sheet["Pearson's R (above threshold)"] = sheet[
            "Pearson's R (above threshold)"
        ].astype(float)
        sheet = sheet[
            (sheet["Cell too small/damaged"] == False)
            & (sheet["Cell overlapping with somatic cell"] == False)
        ]
        sheet["genotype_foci"] = (
            sheet["Genotype"].astype(str)
            + "_"
            + sheet["Has foci"].map({True: "Foci", False: "NoFoci"})
        ).astype(
            CategoricalDtype(
                categories=categories,
                ordered=True,
            )
        )
        
        plots.append(contrast_channel(sheet, "HP1B", stain, fig.add_subplot(num_fig_rows, 1, ax_idx)))
        if ax := contrast_channel(sheet, "H3K4me3", stain, fig.add_subplot(num_fig_rows, 1, ax_idx + 1)):
            plots.append(ax)
            ax_idx += 1
        ax_idx += 1
    return fig


In [7]:
def timeline_plots(excel_file: str):
    categories = ["E14.5", "E15.5", "E16.5", "E18.5", "P1"]
    sheets = pd.read_excel(excel_file, sheet_name=None, header=0)

    def contrast_channel(sheet: pd.DataFrame, channel2: str, stain: str, ax):
        data = sheet[sheet["channel 2"] == channel2]

        if data.empty:
            return None
            
        ax = sns.boxplot(
            data=data,
            x="Time Point",
            y="Pearson's R (above threshold)",
            dodge=False,
            ax=ax,
        )
        ax = sns.swarmplot(
            data=data,
            x="Time Point",
            y="Pearson's R (above threshold)",
            alpha=0.5,
            linewidth=1,
            dodge=False,
            ax=ax,
        )

        ax.set_ylabel(f"Pearson's R ({stain} ~ {channel2})")
        ax.set_xlabel("")
        ax.axhline(y=0.0, color="black")
        ax.set_ylim(ymin=-1.0, ymax=1.0)
        ax.set_title(stain)
        if legend := ax.get_legend():
            legend.remove()
        return ax
    
    num_fig_rows = len(sheets) * 2

    fig= plt.figure(dpi=300, figsize=(5, num_fig_rows * 5))
    ax_idx = 1
    for stain, sheet in sheets.items():
        sheet = (
            sheet.fillna("")
            .astype(str)
            .replace({"Yes": True, "No": False, "": False})
            .convert_dtypes()
        )
        sheet["Germ cell"] = sheet["Germ cell"].astype(int)
        sheet["Time Point"] = sheet["Time Point"].astype(
            CategoricalDtype(categories=categories, ordered=True)
        )
        sheet["Pearson's R (above threshold)"] = sheet[
            "Pearson's R (above threshold)"
        ].astype(float)
        sheet = sheet[
            (sheet["Cell too small/damaged"] == False)
            & (sheet["Cell overlapping with somatic cell"] == False)
        ]
        
        contrast_channel(sheet, "HP1B", stain, fig.add_subplot(num_fig_rows, 1, ax_idx))
        contrast_channel(sheet, "H3K4me3", stain, fig.add_subplot(num_fig_rows, 1, ax_idx + 1))
        ax_idx += 2
    return fig

In [None]:
def basic_plots(excel_file: str):
    sheets = pd.read_excel(excel_file, sheet_name=None, header=0)

    def contrast_channel(sheet: pd.DataFrame, channel2: str, ax):
        data = sheet[sheet["channel 2"] == channel2]

        if data.empty:
            return None
            
        ax = sns.boxplot(
            data=data,
            x="channel 1",
            y="Pearson's R (above threshold)",
            color='gray',
            dodge=False,
            ax=ax,
        )
        ax = sns.swarmplot(
            data=data,
            x="channel 1",
            y="Pearson's R (above threshold)",
            color='gray',
            alpha=0.5,
            linewidth=1,
            dodge=False,
            ax=ax,
        )

        ax.set_ylabel(f"Pearson's R ( ~ {channel2})")
        ax.set_xlabel("")
        ax.axhline(y=0.0, color="black")
        ax.set_ylim(ymin=-1.0, ymax=1.0)
        ax.set_title(f"~ {channel2}")
        if legend := ax.get_legend():
            legend.remove()
        return ax
    
    num_fig_rows = 2

    fig = plt.figure(dpi=300, figsize=(5, num_fig_rows * 5))
    
    def prep_dfs():
        for stain, sheet in sheets.items():
            sheet = (
                sheet.fillna("")
                .astype(str)
                .replace({"Yes": True, "No": False, "": False})
                .convert_dtypes()
            )
            sheet["Germ cell"] = sheet["Germ cell"].astype(int)
            sheet["Pearson's R (above threshold)"] = sheet[
                "Pearson's R (above threshold)"
            ].astype(float)
            sheet = sheet[
                (sheet["Cell too small/damaged"] == False)
                & (sheet["Cell overlapping with somatic cell"] == False)
            ]
            sheet['stain'] = stain
            yield sheet
    sheet = pd.concat(prep_dfs())
    categories = ['SPOCD1', 'SPIN1', 'C19ORF84', 'MIWI2', 'DNMT3L', 'DNMT3C']
    sheet['channel 1'] = sheet["channel 1"].astype(CategoricalDtype(categories=categories, ordered=True))
    contrast_channel(sheet, "HP1B", fig.add_subplot(num_fig_rows, 1, 1))
    contrast_channel(sheet, "H3K4me3", fig.add_subplot(num_fig_rows, 1, 2))

    return fig

In [None]:
plt.close('all')
_ = genotype_plots("./FT - Spocd1 K464A/colocalisation_plotfile_WT_delTPR.xlsx")
_.savefig("WT-dTPR_HP1B coloc.pdf", transparent=True)

In [None]:
plt.close('all')
_ = timeline_plots("./FT - Spocd1 K464A/colocalisation_plotfile_timeline.xlsx")
_.savefig("timeline coloc.pdf", transparent=True)

In [None]:
plt.close('all')
_ = basic_plots("./FT - Spocd1 K464A/colocalisation_plotfile_WT_HA.xlsx")
_.savefig("WT_HA coloc.pdf", transparent=True)