In [None]:
from pathlib import Path
import pickle as pkl
import re

from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns

In [None]:
DATA_DIR = Path("/Users/sylvi/topo_data/hariborings/extracted_grains/")
# FIG_SAVE_DIR = Path(f"/Volumes/shared-3/pyne_group/Shared/Papers/cas9_minicircles/figure_1/")
FIG_SAVE_DIR = Path(
    f"/Users/sylvi/Library/CloudStorage/GoogleDrive-sylvia.whittle@sheffield.ac.uk/Shared drives/Pyne_group_data/Papers/Cas9/figures/fig_1_revised/"
)
assert FIG_SAVE_DIR.exists()
CSV_SAVE_DIR = Path(
    f"/Users/sylvi/Library/CloudStorage/GoogleDrive-sylvia.whittle@sheffield.ac.uk/Shared drives/Pyne_group_data/Papers/Cas9/figures/fig_1_revised/csv_data/"
)
assert CSV_SAVE_DIR.exists()
HIGH_RES_GRAPH_SAVE_DIR = Path(
    f"/Users/sylvi/Library/CloudStorage/GoogleDrive-sylvia.whittle@sheffield.ac.uk/Shared drives/Pyne_group_data/Papers/Cas9/figures/fig_1_revised/high_res_graphs/"
)
assert HIGH_RES_GRAPH_SAVE_DIR.exists()
LOAD_DATE = "2024-05-21"
TODAY_DATE = datetime.today().strftime("%Y-%m-%d")
assert DATA_DIR.exists()

MAX_P_TO_NM = 10.0

SAMPLES = [
    "unbound_ON_REL",
    "unbound_ON_SC",
    "unbound_OT1_REL",
    "unbound_OT1_SC",
    "unbound_OT2_REL",
    "unbound_OT2_SC",
    "cas9_ON_SC",
    "cas9_OT1_SC",
    "cas9_OT2_SC",
]

# Create a big dataframe holding:
# - sample type
# - p_to_nm
# - min_feret

min_feret_lower_threshold = 4
min_feret_upper_threshold = 20

data_list = []
bad_feret_list = []

for sample_type in SAMPLES:
    print(f"loading {sample_type}")
    # Load the data from pickle
    with open(
        DATA_DIR / sample_type / f"date_{LOAD_DATE}" / f"feret_grain_dict_fig1_with_contour_length.pkl", "rb"
    ) as f:
        feret_data = pkl.load(f)

    for grain_index, grain in feret_data.items():
        image = grain["image"]
        if "cas9_" in sample_type:
            mask = grain["predicted_mask"]
        elif "unbound_" in sample_type:
            mask = grain["mask"]
        else:
            raise ValueError()
        p_to_nm = grain["p_to_nm"]
        min_feret = grain["min_feret"]
        max_feret = grain["max_feret"]

        min_feret_coords = grain["min_feret_coords"]
        max_feret_coords = grain["max_feret_coords"]

        contour_length = grain["contour_length"]

        protein_area = grain["protein_area"]
        protein_volume = grain["protein_volume"]

        if p_to_nm <= MAX_P_TO_NM:
            if min_feret < min_feret_lower_threshold or min_feret > min_feret_upper_threshold:
                bad_feret_list.append(
                    {
                        "sample_type": sample_type,
                        "image": image,
                        "mask": mask,
                        "p_to_nm": p_to_nm,
                        "min_feret": min_feret,
                        "max_feret": max_feret,
                        "min_feret_coords": min_feret_coords,
                        "max_feret_coords": max_feret_coords,
                    }
                )
            else:
                data_list.append(
                    {
                        "sample_type": sample_type,
                        "p_to_nm": p_to_nm,
                        "min_feret": min_feret,
                        "max_feret": max_feret,
                        "contour_length": contour_length,
                        "protein_area": protein_area,
                        "protein_volume": protein_volume,
                        "feret_ratio": min_feret / max_feret,
                    }
                )

print(f"num grains: {len(data_list)}")

df = pd.DataFrame(data_list)

print(df.head())

# save df to csv
df.to_csv(CSV_SAVE_DIR / f"plotting_data_{TODAY_DATE}_max_p_to_nm_{MAX_P_TO_NM}.csv", index=False)

print(f"num bad feret: {len(bad_feret_list)}")

# Save the dataframe
# df.to_csv(DATA_DIR / f"feret_data_{TODAY_DATE}_max_p_to_nm_{MAX_P_TO_NM}.csv", index=False)

In [None]:
# plot violin plots of the min ferets for each group

# colours = ["#D81B60", "#CE5782", "#1E88E5", "#6396C3", "#FFC107", "#F3D16D", "#C1879C", "#99ACBD", "#ECDFB6"]

x_ticks = [
    "ON SC",
    "OT1 SC",
    "OT2 SC",
    "ON REL",
    "OT1 REL",
    "OT2 REL",
    "ON SC +dCas9",
    "OT1 SC +dCas9",
    "OT2 SC +dCas9",
]

# Previous ordering
sample_order = [
    "unbound_ON_SC",
    "unbound_OT1_SC",
    "unbound_OT2_SC",
    "unbound_ON_REL",
    "unbound_OT1_REL",
    "unbound_OT2_REL",
    "cas9_ON_SC",
    "cas9_OT1_SC",
    "cas9_OT2_SC",
]

# sample_order = [
#     "unbound_ON_REL",
#     "unbound_OT1_REL",
#     "unbound_OT2_REL",
#     "unbound_ON_SC",
#     "unbound_OT1_SC",
#     "unbound_OT2_SC",
#     "cas9_ON_SC",
#     "cas9_OT1_SC",
#     "cas9_OT2_SC",
# ]

# Prevous ordering
colour_dict = {
    "unbound_ON_SC": "#C1879C",
    "unbound_OT1_SC": "#99ACBD",
    "unbound_OT2_SC": "#ECDFB6",
    "unbound_ON_REL": "#CE5782",
    "unbound_OT1_REL": "#6396C3",
    "unbound_OT2_REL": "#F3D16D",
    "cas9_ON_SC": "#D81B60",
    "cas9_OT1_SC": "#1E88E5",
    "cas9_OT2_SC": "#FFC107",
}

# colour_dict = {
#     "unbound_ON_SC": "#C1879C",
#     "unbound_OT1_SC": "#99ACBD",
#     "unbound_OT2_SC": "#ECDFB6",
#     "unbound_ON_REL": "#CE5782",
#     "unbound_OT1_REL": "#6396C3",
#     "unbound_OT2_REL": "#F3D16D",
#     "cas9_ON_SC": "#D81B60",
#     "cas9_OT1_SC": "#1E88E5",
#     "cas9_OT2_SC": "#FFC107",
# }

# Print ns for each sample
for sample in sample_order:
    print(f"{sample}: {len(df[df['sample_type'] == sample])}")

fig, ax = plt.subplots(figsize=(12, 8))

sns.violinplot(
    data=df, ax=ax, x="sample_type", y="min_feret", hue="sample_type", palette=colour_dict, order=sample_order
)

ax.set_ylabel("Mininum width (nm)", fontsize=20)
ax.set_xlabel("Sample type", fontsize=20)
# reformat x ticks
# plt.xticks(rotation=45, ha="right")
# manualy set x ticks with font size
ticks = ax.get_xticks()
ax.set_yticklabels(ax.get_yticks(), fontsize=18)
# plt.set_xticks(ticks, np.arange(9), x_ticks, fontsize=20, rotation=45, ha="right")
# convert this to be used with axes, setting the font size and rotation and ha
ax.set_xticks(ticks)
ax.set_xticklabels(x_ticks, fontsize=18, rotation=45, ha="right")
# ax.set_title(f"Min Feret width for grains with p_to_nm < {MAX_P_TO_NM}", fontsize=20)
# ax.set_ylim(0, 20)
fig.tight_layout()
# plt.savefig(FIG_SAVE_DIR / f"min_feret_violin_plot_{TODAY_DATE}_max_p_to_nm_{MAX_P_TO_NM}.png", dpi=500)
plt.show()

In [None]:
# plot the bad ferets

for grain in bad_feret_list:
    sample_type = grain["sample_type"]

    min_feret_coords = grain["min_feret_coords"]
    max_feret_coords = grain["max_feret_coords"]

    print(sample_type)
    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    ax[0].imshow(grain["image"])
    ax[0].set_title(f"sample: {sample_type} image")

    multiplied_ticks = np.array(ax[0].get_xticks()) * grain["p_to_nm"]
    multiplied_ticks = [f"{x:.1f}" for x in multiplied_ticks]
    ax[0].set_xticklabels(multiplied_ticks)

    multiplied_ticks = np.array(ax[0].get_yticks()) * grain["p_to_nm"]
    multiplied_ticks = [f"{x:.1f}" for x in multiplied_ticks]
    ax[0].set_yticklabels(multiplied_ticks)

    ax[1].imshow(grain["mask"])
    ax[1].set_title(f"sample: {sample_type} mask")
    ax[1].scatter(min_feret_coords[0, 0], min_feret_coords[0, 1], color="r")
    ax[1].scatter(min_feret_coords[1, 0], min_feret_coords[1, 1], color="r")

    multiplied_ticks = np.array(ax[1].get_xticks()) * grain["p_to_nm"]
    multiplied_ticks = [f"{x:.1f}" for x in multiplied_ticks]
    ax[1].set_xticklabels(multiplied_ticks)

    multiplied_ticks = np.array(ax[1].get_yticks()) * grain["p_to_nm"]
    multiplied_ticks = [f"{x:.1f}" for x in multiplied_ticks]
    ax[1].set_yticklabels(multiplied_ticks)

    plt.suptitle(f"sample: {grain['sample_type']} image: {grain['image']} p_to_nm: {grain['p_to_nm']}")
    plt.show()

In [None]:
# figsize = (4, 2.5)
figsize = (4, 4)
axlabel_font_size = 14
dpi = 500


def hsv_to_hex(h, s, v):
    rgb = mcolors.hsv_to_rgb([h, s, v])
    return mcolors.rgb2hex(rgb)


# palettes of single colour of 3 different intensities
# pink
sc_palette = [hsv_to_hex(0.9, 0.5, 1.0), hsv_to_hex(0.9, 0.75, 0.9), hsv_to_hex(0.9, 1.0, 0.8)]
# blue
sc_cas9_palette = [hsv_to_hex(0.6, 0.5, 1.0), hsv_to_hex(0.6, 0.75, 0.9), hsv_to_hex(0.6, 1.0, 0.8)]
# green
rel_palette = [hsv_to_hex(0.45, 0.5, 1.0), hsv_to_hex(0.45, 0.75, 0.9), hsv_to_hex(0.45, 1.0, 0.8)]

# Plotting funcs

In [None]:
def print_break(title=""):
    print("\n" + "-" * 20 + " " + title + " " + "-" * 20 + "\n")


def print_mean_and_std(type: str, data: np.ndarray):
    print(f"{type} | mean: {np.mean(data):.2f} std: {np.std(data):.2f}")


def create_figure():
    fig, ax = plt.subplots(figsize=figsize)

    return fig, ax


# def save_csv_multi_column(
#     col_names: list[str], data: list[pd.Series], csv_save_name: str, csv_save_path: Path = CSV_SAVE_DIR
# ):
#     """Save multiple series to a csv file"""
#     assert len(data) == len(col_names)
#     # Save the data to a csv file with each name as a column

#     to_save = pd.DataFrame()
#     for col_name, data_series in zip(col_names, data):
#         pd.concat([to_save, data_series], axis=1)

#     if not csv_save_name.endswith(".csv"):
#         csv_save_name += ".csv"
#     df.to_csv(csv_save_path / csv_save_name, index=False)


def plot_kde_individual(
    name: str,
    data: pd.Series,
    ax: plt.Axes,
    color: str,
    fill: bool = True,
    common_norm: bool = False,
    xlabel: str = "",
    csv_save_path: Path = None,
    legend_label: str = None,
):
    print(f"Plotting {name} KDE | mean: {np.mean(data):.2f} std: {np.std(data):.2f}")
    sns.kdeplot(data, ax=ax, color=color, fill=fill, common_norm=common_norm, label=legend_label)
    if xlabel != "":
        ax.set_xlabel(xlabel, fontsize=axlabel_font_size)
    else:
        ax.set_xlabel(name)
    ax.set_ylabel("Density", fontsize=axlabel_font_size)

    if csv_save_path is not None:
        data.to_csv(csv_save_path / f"kde_{name}.csv", index=False)


def plot_kde_multiple(
    names: list,
    data: pd.DataFrame,
    ax: plt.Axes,
    colors: list,
    fill: bool = True,
    common_norm: bool = False,
    legend_labels: list = None,
    xlabel: str = "",
    csv_save_path: Path = None,
):
    assert len(names) == len(colors)
    if legend_labels is not None:
        assert len(names) == len(legend_labels)
    else:
        legend_labels = names

    for i, name in enumerate(names):
        plot_kde_individual(
            name,
            data[name],
            ax[i],
            colors[i],
            fill=fill,
            common_norm=common_norm,
            legend_label=legend_labels[i],
        )

    if xlabel != "":
        ax[0].set_xlabel(xlabel, fontsize=axlabel_font_size)

    ax[0].set_ylabel("Density", fontsize=axlabel_font_size)

    plt.show()

    if csv_save_path is not None:
        names_as_string = "".join(names)
        data.to_csv(csv_save_path / f"kde_{names_as_string}.csv", index=False)

# Feret ratios

In [None]:
# compare feret ratios

print_break("relaxed vs sc without cas9")
df_feret_ratio_rel = df[df["sample_type"].str.contains("REL")].loc[:, ["sample_type", "feret_ratio"]]
df_feret_ratio_sc = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "feret_ratio"]
]
print(f"num relaxed: {len(df_feret_ratio_rel)}")
print(f"num sc: {len(df_feret_ratio_sc)}")
print(f"num total: {len(df_feret_ratio_rel) + len(df_feret_ratio_sc)}")

fig, ax = create_figure()
plot_kde_individual(
    name="rel",
    data=df_feret_ratio_rel["feret_ratio"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Aspect ratio",
    legend_label="rel",
)
plot_kde_individual(name="sc", data=df_feret_ratio_sc["feret_ratio"], ax=ax, color=sc_palette[1], legend_label="sc")
plt.xlabel("Aspect ratio", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
# save both dataframes separately
df_feret_ratio_rel.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_rel.csv", index=False)
df_feret_ratio_sc.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc.csv", index=False)
plt.legend(loc="upper left")
# save the graph
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_feret_ratio_rel_sc.png", dpi=dpi)
plt.show()

print_break("relaxed vs sc vs cas9")

# kde of relaxed vs sc with cas9 separate
# made a df with just the name of the sample and the feret ratio column, keeping the sample type in the name
df_relaxed_feret_ratio = df[df["sample_type"].str.contains("REL")].loc[:, ["sample_type", "feret_ratio"]]
df_sc_feret_ratio = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "feret_ratio"]
]
# find cas9 sc by searching for both cas9 and sc in the string sample type
df_sc_cas9_feret_ratio = df[df["sample_type"].str.contains("SC") & df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "feret_ratio"]
]

print(f"num relaxed: {len(df_relaxed_feret_ratio)}")
print(f"num sc: {len(df_sc_feret_ratio)}")
print(f"num sc cas9: {len(df_sc_cas9_feret_ratio)}")
print(f"num total: {len(df_relaxed_feret_ratio) + len(df_sc_feret_ratio) + len(df_sc_cas9_feret_ratio)}")

fig, ax = create_figure()
plot_kde_individual(
    name="rel",
    data=df_relaxed_feret_ratio["feret_ratio"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Aspect ratio",
    legend_label="rel",
)
plot_kde_individual(name="sc", data=df_sc_feret_ratio["feret_ratio"], ax=ax, color=sc_palette[1], legend_label="sc")
plot_kde_individual(
    name="sc+dCas9",
    data=df_sc_cas9_feret_ratio["feret_ratio"],
    ax=ax,
    color=sc_cas9_palette[1],
    legend_label="sc+dCas9",
)
plt.xlabel("Aspect ratio", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
# combine the dataframes together without creating new columns
df_sc_cas9_feret_ratio.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_cas9.csv", index=False)
plt.legend(loc="upper left")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_feret_ratio_rel_sc_cas9.png", dpi=dpi)
plt.show()


print_break("unbound rel on ot1 ot2")
df_aspect_ratio_unbound_rel_on = df[df["sample_type"] == "unbound_ON_REL"].loc[:, ["sample_type", "feret_ratio"]]
df_aspect_ratio_unbound_ot1 = df[df["sample_type"] == "unbound_OT1_REL"].loc[:, ["sample_type", "feret_ratio"]]
df_aspect_ratio_unbound_ot2 = df[df["sample_type"] == "unbound_OT2_REL"].loc[:, ["sample_type", "feret_ratio"]]
print(f"num on rel: {len(df_aspect_ratio_unbound_rel_on)}")
print(f"num ot1 rel: {len(df_aspect_ratio_unbound_ot1)}")
print(f"num ot2 rel: {len(df_aspect_ratio_unbound_ot2)}")
fig, ax = create_figure()
plot_kde_individual(
    name="on rel",
    data=df_aspect_ratio_unbound_rel_on["feret_ratio"],
    ax=ax,
    color=rel_palette[0],
    legend_label="on rel",
)
plot_kde_individual(
    name="ot1 rel", data=df_aspect_ratio_unbound_ot1["feret_ratio"], ax=ax, color=rel_palette[1], legend_label="ot1 rel"
)
plot_kde_individual(
    name="ot2 rel", data=df_aspect_ratio_unbound_ot2["feret_ratio"], ax=ax, color=rel_palette[2], legend_label="ot2 rel"
)
plt.xlabel("Aspect ratio", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_aspect_ratio_unbound_rel_on.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_rel_on.csv", index=False)
df_aspect_ratio_unbound_ot1.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_rel_ot1.csv", index=False)
df_aspect_ratio_unbound_ot2.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_rel_ot2.csv", index=False)
plt.legend(loc="upper left")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_feret_ratio_rel_on_ot1_ot2.png", dpi=dpi)
plt.show()


print_break("unbound sc on, ot1, ot2")

# on_sc vs ot1_sc vs ot2_sc for non cas9 bound supercoiled
df_unbound_sc_on = df[df["sample_type"] == "unbound_ON_SC"].loc[:, ["sample_type", "feret_ratio"]]
df_unbound_sc_ot1 = df[df["sample_type"] == "unbound_OT1_SC"].loc[:, ["sample_type", "feret_ratio"]]
df_unbound_sc_ot2 = df[df["sample_type"] == "unbound_OT2_SC"].loc[:, ["sample_type", "feret_ratio"]]
print(f"num on sc: {len(df_unbound_sc_on)}")
print(f"num ot1 sc: {len(df_unbound_sc_ot1)}")
print(f"num ot2 sc: {len(df_unbound_sc_ot2)}")

fig, ax = create_figure()
plot_kde_individual(
    name="on sc", data=df_unbound_sc_on["feret_ratio"], ax=ax, color=sc_palette[0], legend_label="on sc"
)
plot_kde_individual(
    name="ot1 sc", data=df_unbound_sc_ot1["feret_ratio"], ax=ax, color=sc_palette[1], legend_label="ot1 sc"
)
plot_kde_individual(
    name="ot2 sc", data=df_unbound_sc_ot2["feret_ratio"], ax=ax, color=sc_palette[2], legend_label="ot2 sc"
)
plt.xlabel("Aspect ratio", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_unbound_sc_on.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_on.csv", index=False)
df_unbound_sc_ot1.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_ot1.csv", index=False)
df_unbound_sc_ot2.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_ot2.csv", index=False)
plt.legend(loc="upper left")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_feret_ratio_sc_on_ot1_ot2.png", dpi=dpi)
plt.show()

print_break("cas9 on, ot1, ot2")

df_cas9_sc_on = df[df["sample_type"] == "cas9_ON_SC"].loc[:, ["sample_type", "feret_ratio"]]
df_cas9_sc_ot1 = df[df["sample_type"] == "cas9_OT1_SC"].loc[:, ["sample_type", "feret_ratio"]]
df_cas9_sc_ot2 = df[df["sample_type"] == "cas9_OT2_SC"].loc[:, ["sample_type", "feret_ratio"]]
print(f"num on sc: {len(df_cas9_sc_on)}")
print(f"num ot1 sc: {len(df_cas9_sc_ot1)}")
print(f"num ot2 sc: {len(df_cas9_sc_ot2)}")

fig, ax = create_figure()
plot_kde_individual(
    name="on sc", data=df_cas9_sc_on["feret_ratio"], ax=ax, color=sc_cas9_palette[0], legend_label="on sc"
)
plot_kde_individual(
    name="ot1 sc", data=df_cas9_sc_ot1["feret_ratio"], ax=ax, color=sc_cas9_palette[1], legend_label="ot1 sc"
)
plot_kde_individual(
    name="ot2 sc", data=df_cas9_sc_ot2["feret_ratio"], ax=ax, color=sc_cas9_palette[2], legend_label="ot2 sc"
)
plt.xlabel("Aspect ratio", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_cas9_sc_on.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_cas9_on.csv", index=False)
df_cas9_sc_ot1.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_cas9_ot1.csv", index=False)
df_cas9_sc_ot2.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_cas9_ot2.csv", index=False)
plt.legend(loc="upper left")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_feret_ratio_sc_cas9_on_ot1_ot2.png", dpi=dpi)
plt.show()

print_break("rel vs sc vs cas9 but on only for cas9")
df_feret_ratio_rel = df[df["sample_type"].str.contains("REL")].loc[:, ["sample_type", "feret_ratio"]]
df_feret_ratio_sc = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "feret_ratio"]
]
df_feret_ratio_sc_cas9_on = df[df["sample_type"] == "cas9_ON_SC"].loc[:, ["sample_type", "feret_ratio"]]
print(f"num relaxed: {len(df_feret_ratio_rel)}")
print(f"num sc: {len(df_feret_ratio_sc)}")
print(f"num sc cas9 on: {len(df_feret_ratio_sc_cas9_on)}")
print(f"num total: {len(df_feret_ratio_rel) + len(df_feret_ratio_sc) + len(df_feret_ratio_sc_cas9_on)}")
fig, ax = create_figure()
plot_kde_individual(
    name="rel",
    data=df_feret_ratio_rel["feret_ratio"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Aspect ratio",
    legend_label="rel",
)
plot_kde_individual(name="sc", data=df_feret_ratio_sc["feret_ratio"], ax=ax, color=sc_palette[1], legend_label="sc")
plot_kde_individual(
    name="sc+dCas9 on",
    data=df_feret_ratio_sc_cas9_on["feret_ratio"],
    ax=ax,
    color=sc_cas9_palette[0],
    legend_label="sc+dCas9 on",
)
plt.xlabel("Aspect ratio", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_feret_ratio_rel.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_rel.csv", index=False)
df_feret_ratio_sc.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc.csv", index=False)
df_feret_ratio_sc_cas9_on.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_cas9_on.csv", index=False)
plt.legend(loc="upper left")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_feret_ratio_rel_sc_cas9_on.png", dpi=dpi)
plt.show()

print_break("rel vs sc vs cas9 but all only on")
df_feret_ratio_rel_on = df[df["sample_type"] == "unbound_ON_REL"].loc[:, ["sample_type", "feret_ratio"]]
df_feret_ratio_sc_on = df[df["sample_type"] == "unbound_ON_SC"].loc[:, ["sample_type", "feret_ratio"]]
df_feret_ratio_sc_cas9_on = df[df["sample_type"] == "cas9_ON_SC"].loc[:, ["sample_type", "feret_ratio"]]
print(f"num relaxed on: {len(df_feret_ratio_rel_on)}")
print(f"num sc on: {len(df_feret_ratio_sc_on)}")
print(f"num sc cas9 on: {len(df_feret_ratio_sc_cas9_on)}")
print(f"num total: {len(df_feret_ratio_rel_on) + len(df_feret_ratio_sc_on) + len(df_feret_ratio_sc_cas9_on)}")
fig, ax = create_figure()
plot_kde_individual(
    name="rel on",
    data=df_feret_ratio_rel_on["feret_ratio"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Aspect ratio",
    legend_label="rel on",
)
plot_kde_individual(
    name="sc on", data=df_feret_ratio_sc_on["feret_ratio"], ax=ax, color=sc_palette[1], legend_label="sc on"
)
plot_kde_individual(
    name="sc+dCas9 on",
    data=df_feret_ratio_sc_cas9_on["feret_ratio"],
    ax=ax,
    color=sc_cas9_palette[0],
    legend_label="sc+dCas9 on",
)
plt.xlabel("Aspect ratio", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_feret_ratio_rel_on.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_rel_on.csv", index=False)
df_feret_ratio_sc_on.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_on.csv", index=False)
df_feret_ratio_sc_cas9_on.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_cas9_on.csv", index=False)
plt.legend(loc="upper left")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_feret_ratio_rel_on_sc_on_cas9_on.png", dpi=dpi)
plt.show()

print_break("rel on vs sc on")
df_feret_ratio_rel_on = df[df["sample_type"] == "unbound_ON_REL"].loc[:, ["sample_type", "feret_ratio"]]
df_feret_ratio_sc_on = df[df["sample_type"] == "unbound_ON_SC"].loc[:, ["sample_type", "feret_ratio"]]
print(f"num relaxed on: {len(df_feret_ratio_rel_on)}")
print(f"num sc on: {len(df_feret_ratio_sc_on)}")
print(f"num total: {len(df_feret_ratio_rel_on) + len(df_feret_ratio_sc_on)}")
fig, ax = create_figure()
plot_kde_individual(
    name="rel on",
    data=df_feret_ratio_rel_on["feret_ratio"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Aspect ratio",
    legend_label="rel on",
)
plot_kde_individual(
    name="sc on", data=df_feret_ratio_sc_on["feret_ratio"], ax=ax, color=sc_palette[1], legend_label="sc on"
)
plt.xlabel("Aspect ratio", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_feret_ratio_rel_on.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_rel_on.csv", index=False)
df_feret_ratio_sc_on.to_csv(CSV_SAVE_DIR / "kde_feret_ratio_sc_on.csv", index=False)
plt.legend(loc="upper left")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_feret_ratio_rel_on_sc_on.png", dpi=dpi)
plt.show()

# Min feret

In [None]:
# compare min ferets

print_break("rel vs sc with no cas9")
df_min_feret_rel = df[df["sample_type"].str.contains("REL")].loc[:, ["sample_type", "min_feret"]]
df_min_feret_sc = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "min_feret"]
]
print(f"num relaxed: {len(df_min_feret_rel)}")
print(f"num sc no cas9: {len(df_min_feret_sc)}")
print(f"total: {len(df_min_feret_rel) + len(df_min_feret_sc)}")
fig, ax = create_figure()
plot_kde_individual(
    name="rel",
    data=df_min_feret_rel["min_feret"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Min width (nm)",
    legend_label="rel",
)
plot_kde_individual(
    name="sc",
    data=df_min_feret_sc["min_feret"],
    ax=ax,
    color=sc_palette[1],
    legend_label="sc",
)
plt.xlabel("Min width (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_min_feret_rel.to_csv(CSV_SAVE_DIR / "kde_min_feret_rel.csv", index=False)
df_min_feret_sc.to_csv(CSV_SAVE_DIR / "kde_min_feret_sc.csv", index=False)
plt.legend(loc="upper left")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_min_feret_rel_sc.png", dpi=dpi)
plt.show()

print_break("relaxed vs sc vs cas9")

df_min_feret_rel = df[df["sample_type"].str.contains("REL")].loc[:, ["sample_type", "min_feret"]]
# sc without cas9
df_min_feret_sc = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "min_feret"]
]
# sc with cas9
df_min_feret_sc_cas9 = df[df["sample_type"].str.contains("SC") & df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "min_feret"]
]

print(f"num relaxed: {len(df_min_feret_rel)}")
print(f"num sc no cas9: {len(df_min_feret_sc)}")
print(f"num sc cas9: {len(df_min_feret_sc_cas9)}")
print(f"total: {len(df_min_feret_rel) + len(df_min_feret_sc) + len(df_min_feret_sc_cas9)}")

fig, ax = create_figure()
plot_kde_individual(
    name="rel",
    data=df_min_feret_rel["min_feret"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Min width (nm)",
    legend_label="rel",
)
plot_kde_individual(
    name="sc",
    data=df_min_feret_sc["min_feret"],
    ax=ax,
    color=sc_palette[1],
    legend_label="sc",
)
plot_kde_individual(
    name="sc+dCas9",
    data=df_min_feret_sc_cas9["min_feret"],
    ax=ax,
    color=sc_cas9_palette[1],
    legend_label="sc+dCas9",
)
plt.xlabel("Min width (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_min_feret_sc_cas9.to_csv(CSV_SAVE_DIR / "kde_min_feret_sc_cas9.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_min_feret_rel_sc_cas9.png", dpi=dpi)
plt.show()


# on_sc vs ot1_sc vs ot2_sc for cas9 bound samples
print_break("cas9 on, ot1, ot2")

df_min_feret_cas9_on_sc = df[df["sample_type"] == "cas9_ON_SC"].loc[:, ["sample_type", "min_feret"]]
df_min_feret_cas9_ot1_sc = df[df["sample_type"] == "cas9_OT1_SC"].loc[:, ["sample_type", "min_feret"]]
df_min_feret_cas9_ot2_sc = df[df["sample_type"] == "cas9_OT2_SC"].loc[:, ["sample_type", "min_feret"]]
print(f"num on sc: {len(df_min_feret_cas9_on_sc)}")
print(f"num ot1 sc: {len(df_min_feret_cas9_ot1_sc)}")
print(f"num ot2 sc: {len(df_min_feret_cas9_ot2_sc)}")
fig, ax = create_figure()
plot_kde_individual(
    name="on sc",
    data=df_min_feret_cas9_on_sc["min_feret"],
    ax=ax,
    color=sc_cas9_palette[0],
    xlabel="Min width (nm)",
    legend_label="on sc",
)
plot_kde_individual(
    name="ot1 sc",
    data=df_min_feret_cas9_ot1_sc["min_feret"],
    ax=ax,
    color=sc_cas9_palette[1],
    legend_label="ot1 sc",
)
plot_kde_individual(
    name="ot2 sc",
    data=df_min_feret_cas9_ot2_sc["min_feret"],
    ax=ax,
    color=sc_cas9_palette[2],
    legend_label="ot2 sc",
)
plt.xlabel("Min width (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_min_feret_cas9_on_sc.to_csv(CSV_SAVE_DIR / "kde_min_feret_sc_cas9_on.csv", index=False)
df_min_feret_cas9_ot1_sc.to_csv(CSV_SAVE_DIR / "kde_min_feret_sc_cas9_ot1.csv", index=False)
df_min_feret_cas9_ot2_sc.to_csv(CSV_SAVE_DIR / "kde_min_feret_sc_cas9_ot2.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_min_feret_sc_cas9_on_ot1_ot2.png", dpi=dpi)
plt.show()

print_break("min feret rel on ot1 ot2")
df_min_feret_unbound_on_rel = df[df["sample_type"] == "unbound_ON_REL"].loc[:, ["sample_type", "min_feret"]]
df_min_feret_unbound_ot1_rel = df[df["sample_type"] == "unbound_OT1_REL"].loc[:, ["sample_type", "min_feret"]]
df_min_feret_unbound_ot2_rel = df[df["sample_type"] == "unbound_OT2_REL"].loc[:, ["sample_type", "min_feret"]]
print(f"num on rel: {len(df_min_feret_unbound_on_rel)}")
print(f"num ot1 rel: {len(df_min_feret_unbound_ot1_rel)}")
print(f"num ot2 rel: {len(df_min_feret_unbound_ot2_rel)}")
fig, ax = create_figure()
plot_kde_individual(
    name="on rel",
    data=df_min_feret_unbound_on_rel["min_feret"],
    ax=ax,
    color=rel_palette[0],
    xlabel="Min width (nm)",
    legend_label="on rel",
)
plot_kde_individual(
    name="ot1 rel",
    data=df_min_feret_unbound_ot1_rel["min_feret"],
    ax=ax,
    color=rel_palette[1],
    legend_label="ot1 rel",
)
plot_kde_individual(
    name="ot2 rel",
    data=df_min_feret_unbound_ot2_rel["min_feret"],
    ax=ax,
    color=rel_palette[2],
    legend_label="ot2 rel",
)
plt.xlabel("Min width (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_min_feret_unbound_on_rel.to_csv(CSV_SAVE_DIR / "kde_min_feret_rel_on.csv", index=False)
df_min_feret_unbound_ot1_rel.to_csv(CSV_SAVE_DIR / "kde_min_feret_rel_ot1.csv", index=False)
df_min_feret_unbound_ot2_rel.to_csv(CSV_SAVE_DIR / "kde_min_feret_rel_ot2.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_min_feret_rel_on_ot1_ot2.png", dpi=dpi)
plt.show()


print_break(f"min feret sc on ot1 ot2")

df_min_feret_unbound_on_sc = df[df["sample_type"] == "unbound_ON_SC"].loc[:, ["sample_type", "min_feret"]]
df_min_feret_unbound_ot1_sc = df[df["sample_type"] == "unbound_OT1_SC"].loc[:, ["sample_type", "min_feret"]]
df_min_feret_unbound_ot2_sc = df[df["sample_type"] == "unbound_OT2_SC"].loc[:, ["sample_type", "min_feret"]]
print(f"num on sc: {len(df_min_feret_unbound_on_sc)}")
print(f"num ot1 sc: {len(df_min_feret_unbound_ot1_sc)}")
print(f"num ot2 sc: {len(df_min_feret_unbound_ot2_sc)}")
fig, ax = create_figure()
plot_kde_individual(
    name="on sc",
    data=df_min_feret_unbound_on_sc["min_feret"],
    ax=ax,
    color=sc_palette[0],
    xlabel="Min width (nm)",
    legend_label="on sc",
)
plot_kde_individual(
    name="ot1 sc",
    data=df_min_feret_unbound_ot1_sc["min_feret"],
    ax=ax,
    color=sc_palette[1],
    legend_label="ot1 sc",
)
plot_kde_individual(
    name="ot2 sc",
    data=df_min_feret_unbound_ot2_sc["min_feret"],
    ax=ax,
    color=sc_palette[2],
    legend_label="ot2 sc",
)
plt.xlabel("Min width (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_min_feret_unbound_on_sc.to_csv(CSV_SAVE_DIR / "kde_min_feret_sc_on.csv", index=False)
df_min_feret_unbound_ot1_sc.to_csv(CSV_SAVE_DIR / "kde_min_feret_sc_ot1.csv", index=False)
df_min_feret_unbound_ot2_sc.to_csv(CSV_SAVE_DIR / "kde_min_feret_sc_ot2.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_min_feret_sc_on_ot1_ot2.png", dpi=dpi)
plt.show()

# Contour lengths

In [None]:
# Relaxed vs sc without cas9
print_break("relaxed vs sc without cas9")
df_contour_rel = df[df["sample_type"].str.contains("REL")].loc[:, ["sample_type", "contour_length"]]
df_contour_sc = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "contour_length"]
]

print(f"num relaxed: {len(df_contour_rel)}")
print(f"num sc no cas9: {len(df_contour_sc)}")
print(f"total: {len(df_contour_rel) + len(df_contour_sc)}")

fig, ax = create_figure()
plot_kde_individual(
    name="rel",
    data=df_contour_rel["contour_length"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Contour length (nm)",
    legend_label="rel",
)
plot_kde_individual(
    name="sc",
    data=df_contour_sc["contour_length"],
    ax=ax,
    color=sc_palette[1],
    legend_label="sc",
)
plt.xlabel("Contour length (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_contour_rel.to_csv(CSV_SAVE_DIR / "kde_contour_length_rel.csv", index=False)
df_contour_sc.to_csv(CSV_SAVE_DIR / "kde_contour_length_sc.csv", index=False)
plt.legend(loc="upper left")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_contour_length_rel_sc.png", dpi=dpi)
plt.show()

print_break("relaxed vs sc vs cas9")

df_contour_rel = df[df["sample_type"].str.contains("REL")].loc[:, ["sample_type", "contour_length"]]
df_contour_sc = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "contour_length"]
]
df_contour_sc_cas9 = df[df["sample_type"].str.contains("SC") & df["sample_type"].str.contains("cas9")].loc[
    :, ["sample_type", "contour_length"]
]

print(f"num relaxed: {len(df_contour_rel)}")
print(f"num sc no cas9: {len(df_contour_sc)}")
print(f"num sc cas9: {len(df_contour_sc_cas9)}")
print(f"total: {len(df_contour_rel) + len(df_contour_sc) + len(df_contour_sc_cas9)}")

fig, ax = create_figure()
plot_kde_individual(
    name="rel",
    data=df_contour_rel["contour_length"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Contour length (nm)",
    legend_label="rel",
)
plot_kde_individual(
    name="sc",
    data=df_contour_sc["contour_length"],
    ax=ax,
    color=sc_palette[1],
    legend_label="sc",
)
plot_kde_individual(
    name="sc+dCas9",
    data=df_contour_sc_cas9["contour_length"],
    ax=ax,
    color=sc_cas9_palette[1],
    legend_label="sc+dCas9",
)
plt.xlabel("Contour length (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_contour_sc_cas9.to_csv(CSV_SAVE_DIR / "kde_contour_length_sc_cas9.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_contour_length_rel_sc_cas9.png", dpi=dpi)
plt.show()

print_break("unbound rel on ot1 ot2")
df_contour_length_unbound_rel_on = df[df["sample_type"] == "unbound_ON_REL"].loc[:, ["sample_type", "contour_length"]]
df_contour_length_unbound_ot1_rel = df[df["sample_type"] == "unbound_OT1_REL"].loc[:, ["sample_type", "contour_length"]]
df_contour_length_unbound_ot2_rel = df[df["sample_type"] == "unbound_OT2_REL"].loc[:, ["sample_type", "contour_length"]]
print(f"num on rel: {len(df_contour_length_unbound_rel_on)}")
print(f"num ot1 rel: {len(df_contour_length_unbound_ot1_rel)}")
print(f"num ot2 rel: {len(df_contour_length_unbound_ot2_rel)}")
fig, ax = create_figure()
plot_kde_individual(
    name="on rel",
    data=df_contour_length_unbound_rel_on["contour_length"],
    ax=ax,
    color=rel_palette[0],
    xlabel="Contour length (nm)",
    legend_label="on rel",
)
plot_kde_individual(
    name="ot1 rel",
    data=df_contour_length_unbound_ot1_rel["contour_length"],
    ax=ax,
    color=rel_palette[1],
    legend_label="ot1 rel",
)
plot_kde_individual(
    name="ot2 rel",
    data=df_contour_length_unbound_ot2_rel["contour_length"],
    ax=ax,
    color=rel_palette[2],
    legend_label="ot2 rel",
)
plt.xlabel("Contour length (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_contour_length_unbound_rel_on.to_csv(CSV_SAVE_DIR / "kde_contour_length_rel_on.csv", index=False)
df_contour_length_unbound_ot1_rel.to_csv(CSV_SAVE_DIR / "kde_contour_length_rel_ot1.csv", index=False)
df_contour_length_unbound_ot2_rel.to_csv(CSV_SAVE_DIR / "kde_contour_length_rel_ot2.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_contour_length_rel_on_ot1_ot2.png", dpi=dpi)
plt.show()

print_break("unbound sc on, ot1, ot2")
df_contour_length_unbound_on_sc = df[df["sample_type"] == "unbound_ON_SC"].loc[:, ["sample_type", "contour_length"]]
df_contour_length_unbound_ot1_sc = df[df["sample_type"] == "unbound_OT1_SC"].loc[:, ["sample_type", "contour_length"]]
df_contour_length_unbound_ot2_sc = df[df["sample_type"] == "unbound_OT2_SC"].loc[:, ["sample_type", "contour_length"]]
print(f"num on sc: {len(df_contour_length_unbound_on_sc)}")
print(f"num ot1 sc: {len(df_contour_length_unbound_ot1_sc)}")
print(f"num ot2 sc: {len(df_contour_length_unbound_ot2_sc)}")
fig, ax = create_figure()
plot_kde_individual(
    name="on sc",
    data=df_contour_length_unbound_on_sc["contour_length"],
    ax=ax,
    color=sc_palette[0],
    xlabel="Contour length (nm)",
    legend_label="on sc",
)
plot_kde_individual(
    name="ot1 sc",
    data=df_contour_length_unbound_ot1_sc["contour_length"],
    ax=ax,
    color=sc_palette[1],
    legend_label="ot1 sc",
)
plot_kde_individual(
    name="ot2 sc",
    data=df_contour_length_unbound_ot2_sc["contour_length"],
    ax=ax,
    color=sc_palette[2],
    legend_label="ot2 sc",
)
plt.xlabel("Contour length (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_contour_length_unbound_on_sc.to_csv(CSV_SAVE_DIR / "kde_contour_length_sc_on.csv", index=False)
df_contour_length_unbound_ot1_sc.to_csv(CSV_SAVE_DIR / "kde_contour_length_sc_ot1.csv", index=False)
df_contour_length_unbound_ot2_sc.to_csv(CSV_SAVE_DIR / "kde_contour_length_sc_ot2.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_contour_length_sc_on_ot1_ot2.png", dpi=dpi)
plt.show()

print_break("cas9 sc on, ot1, ot2")
# cas9 bound on_sc, ot1_sc, ot2_sc
df_contour_cas9_on_sc = df[df["sample_type"] == "cas9_ON_SC"].loc[:, ["sample_type", "contour_length"]]
df_contour_cas9_ot1_sc = df[df["sample_type"] == "cas9_OT1_SC"].loc[:, ["sample_type", "contour_length"]]
df_contour_cas9_ot2_sc = df[df["sample_type"] == "cas9_OT2_SC"].loc[:, ["sample_type", "contour_length"]]
print(f"num on sc: {len(df_contour_cas9_on_sc)}")
print(f"num ot1 sc: {len(df_contour_cas9_ot1_sc)}")
print(f"num ot2 sc: {len(df_contour_cas9_ot2_sc)}")
fig, ax = create_figure()
plot_kde_individual(
    name="on sc",
    data=df_contour_cas9_on_sc["contour_length"],
    ax=ax,
    color=sc_cas9_palette[0],
    xlabel="Contour length (nm)",
    legend_label="on sc",
)
plot_kde_individual(
    name="ot1 sc",
    data=df_contour_cas9_ot1_sc["contour_length"],
    ax=ax,
    color=sc_cas9_palette[1],
    legend_label="ot1 sc",
)
plot_kde_individual(
    name="ot2 sc",
    data=df_contour_cas9_ot2_sc["contour_length"],
    ax=ax,
    color=sc_cas9_palette[2],
    legend_label="ot2 sc",
)
plt.xlabel("Contour length (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_contour_cas9_on_sc.to_csv(CSV_SAVE_DIR / "kde_contour_length_sc_cas9_on.csv", index=False)
df_contour_cas9_ot1_sc.to_csv(CSV_SAVE_DIR / "kde_contour_length_sc_cas9_ot1.csv", index=False)
df_contour_cas9_ot2_sc.to_csv(CSV_SAVE_DIR / "kde_contour_length_sc_cas9_ot2.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_contour_length_sc_cas9_on_ot1_ot2.png", dpi=dpi)
plt.show()

print_break("rel on vs sc on")
df_contour_length_rel_on = df[df["sample_type"] == "unbound_ON_REL"].loc[:, ["sample_type", "contour_length"]]
df_contour_length_sc_on = df[df["sample_type"] == "unbound_ON_SC"].loc[:, ["sample_type", "contour_length"]]
print(f"num relaxed on: {len(df_contour_length_rel_on)}")
print(f"num sc on: {len(df_contour_length_sc_on)}")
print(f"total: {len(df_contour_length_rel_on) + len(df_contour_length_sc_on)}")
fig, ax = create_figure()
plot_kde_individual(
    name="rel on",
    data=df_contour_length_rel_on["contour_length"],
    ax=ax,
    color=rel_palette[1],
    xlabel="Contour length (nm)",
    legend_label="rel on",
)
plot_kde_individual(
    name="sc on",
    data=df_contour_length_sc_on["contour_length"],
    ax=ax,
    color=sc_palette[1],
    legend_label="sc on",
)
plt.xlabel("Contour length (nm)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_contour_length_rel_on.to_csv(CSV_SAVE_DIR / "kde_contour_length_rel_on.csv", index=False)
df_contour_length_sc_on.to_csv(CSV_SAVE_DIR / "kde_contour_length_sc_on.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_contour_length_rel_on_sc_on.png", dpi=dpi)
plt.show()

# Protein areas

In [None]:
# plot protein areas

print_break("protein areas | cas9 sc on, ot1, ot2")
df_protein_area_cas9_on_sc = df[df["sample_type"] == "cas9_ON_SC"].loc[:, ["sample_type", "protein_area"]]
df_protein_area_cas9_ot1_sc = df[df["sample_type"] == "cas9_OT1_SC"].loc[:, ["sample_type", "protein_area"]]
df_protein_area_cas9_ot2_sc = df[df["sample_type"] == "cas9_OT2_SC"].loc[:, ["sample_type", "protein_area"]]
print(f"num on sc: {len(df_protein_area_cas9_on_sc)}")
print(f"num ot1 sc: {len(df_protein_area_cas9_ot1_sc)}")
print(f"num ot2 sc: {len(df_protein_area_cas9_ot2_sc)}")
fig, ax = create_figure()
plot_kde_individual(
    name="on sc",
    data=df_protein_area_cas9_on_sc["protein_area"],
    ax=ax,
    color=sc_cas9_palette[0],
    xlabel="Protein area (nm^2)",
    legend_label="on sc+dCas9",
)
plot_kde_individual(
    name="ot1 sc",
    data=df_protein_area_cas9_ot1_sc["protein_area"],
    ax=ax,
    color=sc_cas9_palette[1],
    legend_label="ot1 sc+dCas9",
)
plot_kde_individual(
    name="ot2 sc",
    data=df_protein_area_cas9_ot2_sc["protein_area"],
    ax=ax,
    color=sc_cas9_palette[2],
    legend_label="ot2 sc+dCas9",
)
plt.xlabel("Protein area (nm^2)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_protein_area_cas9_on_sc.to_csv(CSV_SAVE_DIR / "kde_protein_area_sc_cas9_on.csv", index=False)
df_protein_area_cas9_ot1_sc.to_csv(CSV_SAVE_DIR / "kde_protein_area_sc_cas9_ot1.csv", index=False)
df_protein_area_cas9_ot2_sc.to_csv(CSV_SAVE_DIR / "kde_protein_area_sc_cas9_ot2.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_protein_area_sc_cas9_on_ot1_ot2.png", dpi=dpi)
plt.show()

print_break("protein volumes | cas9 sc on, ot1, ot2")

df_protein_volume_cas9_on_sc = df[df["sample_type"] == "cas9_ON_SC"].loc[:, ["sample_type", "protein_volume"]]
df_protein_volume_cas9_ot1_sc = df[df["sample_type"] == "cas9_OT1_SC"].loc[:, ["sample_type", "protein_volume"]]
df_protein_volume_cas9_ot2_sc = df[df["sample_type"] == "cas9_OT2_SC"].loc[:, ["sample_type", "protein_volume"]]
print(f"num on sc: {len(df_protein_volume_cas9_on_sc)}")
print(f"num ot1 sc: {len(df_protein_volume_cas9_ot1_sc)}")
print(f"num ot2 sc: {len(df_protein_volume_cas9_ot2_sc)}")
fig, ax = create_figure()
plot_kde_individual(
    name="on sc",
    data=df_protein_volume_cas9_on_sc["protein_volume"],
    ax=ax,
    color=sc_cas9_palette[0],
    xlabel="Protein volume (nm^3)",
    legend_label="on sc",
)
plot_kde_individual(
    name="ot1 sc",
    data=df_protein_volume_cas9_ot1_sc["protein_volume"],
    ax=ax,
    color=sc_cas9_palette[1],
    legend_label="ot1 sc",
)
plot_kde_individual(
    name="ot2 sc",
    data=df_protein_volume_cas9_ot2_sc["protein_volume"],
    ax=ax,
    color=sc_cas9_palette[2],
    legend_label="ot2 sc",
)
plt.xlabel("Protein volume (nm^3)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_protein_volume_cas9_on_sc.to_csv(CSV_SAVE_DIR / "kde_protein_volume_sc_cas9_on.csv", index=False)
df_protein_volume_cas9_ot1_sc.to_csv(CSV_SAVE_DIR / "kde_protein_volume_sc_cas9_ot1.csv", index=False)
df_protein_volume_cas9_ot2_sc.to_csv(CSV_SAVE_DIR / "kde_protein_volume_sc_cas9_ot2.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_protein_volume_sc_cas9_on_ot1_ot2.png", dpi=dpi)
plt.show()

print_break("all protein areas cas9")
df_protein_area_cas9 = df[df["sample_type"].str.contains("cas9")].loc[:, ["sample_type", "protein_area"]]
print(f"num cas9: {len(df_protein_area_cas9)}")
fig, ax = create_figure()
plot_kde_individual(
    name="cas9",
    data=df_protein_area_cas9["protein_area"],
    ax=ax,
    color=sc_cas9_palette[1],
    xlabel="Protein area (nm^2)",
    legend_label="cas9",
)
plt.xlabel("Protein area (nm^2)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_protein_area_cas9.to_csv(CSV_SAVE_DIR / "kde_protein_area_sc_cas9.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_protein_area_sc_cas9.png", dpi=dpi)
plt.show()

print_break("all protein volumes cas9")
df_protein_volume_cas9 = df[df["sample_type"].str.contains("cas9")].loc[:, ["sample_type", "protein_volume"]]
print(f"num cas9: {len(df_protein_volume_cas9)}")
fig, ax = create_figure()
plot_kde_individual(
    name="cas9",
    data=df_protein_volume_cas9["protein_volume"],
    ax=ax,
    color=sc_cas9_palette[1],
    xlabel="Protein volume (nm^3)",
    legend_label="cas9",
)
plt.xlabel("Protein volume (nm^3)", fontsize=axlabel_font_size)
plt.ylabel("Density", fontsize=axlabel_font_size)
df_protein_volume_cas9_on_sc.to_csv(CSV_SAVE_DIR / "kde_protein_volume_sc_cas9.csv", index=False)
plt.legend(loc="upper right")
fig.tight_layout()
plt.savefig(HIGH_RES_GRAPH_SAVE_DIR / f"kde_protein_volume_sc_cas9.png", dpi=dpi)
plt.show()