In [None]:
from pathlib import Path
import pickle as pkl
import re

from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
DATA_DIR = Path("/Users/sylvi/topo_data/hariborings/extracted_grains/")
FIG_SAVE_DIR = Path(f"/Volumes/shared-3/pyne_group/Shared/Papers/cas9_minicircles/figure_1/")
LOAD_DATE = "2024-05-21"
TODAY_DATE = datetime.today().strftime("%Y-%m-%d")
assert DATA_DIR.exists()

MAX_P_TO_NM = 10.0

SAMPLES = [
    "unbound_ON_REL",
    "unbound_ON_SC",
    "unbound_OT1_REL",
    "unbound_OT1_SC",
    "unbound_OT2_REL",
    "unbound_OT2_SC",
    "cas9_ON_SC",
    "cas9_OT1_SC",
    "cas9_OT2_SC",
]

# Create a big dataframe holding:
# - sample type
# - p_to_nm
# - min_feret

min_feret_lower_threshold = 4
min_feret_upper_threshold = 20

data_list = []
bad_feret_list = []

for sample_type in SAMPLES:
    print(f"loading {sample_type}")
    # Load the data from pickle
    with open(
        DATA_DIR / sample_type / f"date_{LOAD_DATE}" / f"feret_grain_dict_fig1_with_contour_length.pkl", "rb"
    ) as f:
        feret_data = pkl.load(f)

    for grain_index, grain in feret_data.items():
        image = grain["image"]
        if "cas9_" in sample_type:
            mask = grain["predicted_mask"]
        elif "unbound_" in sample_type:
            mask = grain["mask"]
        else:
            raise ValueError()
        p_to_nm = grain["p_to_nm"]
        min_feret = grain["min_feret"]
        max_feret = grain["max_feret"]

        min_feret_coords = grain["min_feret_coords"]
        max_feret_coords = grain["max_feret_coords"]

        contour_length = grain["contour_length"]

        protein_area = grain["protein_area"]
        protein_volume = grain["protein_volume"]

        if p_to_nm <= MAX_P_TO_NM:
            if min_feret < min_feret_lower_threshold or min_feret > min_feret_upper_threshold:
                bad_feret_list.append(
                    {
                        "sample_type": sample_type,
                        "image": image,
                        "mask": mask,
                        "p_to_nm": p_to_nm,
                        "min_feret": min_feret,
                        "max_feret": max_feret,
                        "min_feret_coords": min_feret_coords,
                        "max_feret_coords": max_feret_coords,
                    }
                )
            else:
                data_list.append(
                    {
                        "sample_type": sample_type,
                        "p_to_nm": p_to_nm,
                        "min_feret": min_feret,
                        "max_feret": max_feret,
                        "contour_length": contour_length,
                        "protein_area": protein_area,
                        "protein_volume": protein_volume,
                        "feret_ratio": min_feret / max_feret,
                    }
                )

print(f"num grains: {len(data_list)}")

df = pd.DataFrame(data_list)

print(df.head())

print(f"num bad feret: {len(bad_feret_list)}")

# Save the dataframe
# df.to_csv(DATA_DIR / f"feret_data_{TODAY_DATE}_max_p_to_nm_{MAX_P_TO_NM}.csv", index=False)

In [None]:
# plot violin plots of the min ferets for each group

# colours = ["#D81B60", "#CE5782", "#1E88E5", "#6396C3", "#FFC107", "#F3D16D", "#C1879C", "#99ACBD", "#ECDFB6"]

x_ticks = [
    "ON SC",
    "OT1 SC",
    "OT2 SC",
    "ON REL",
    "OT1 REL",
    "OT2 REL",
    "ON SC +dCas9",
    "OT1 SC +dCas9",
    "OT2 SC +dCas9",
]

# Previous ordering
sample_order = [
    "unbound_ON_SC",
    "unbound_OT1_SC",
    "unbound_OT2_SC",
    "unbound_ON_REL",
    "unbound_OT1_REL",
    "unbound_OT2_REL",
    "cas9_ON_SC",
    "cas9_OT1_SC",
    "cas9_OT2_SC",
]

# sample_order = [
#     "unbound_ON_REL",
#     "unbound_OT1_REL",
#     "unbound_OT2_REL",
#     "unbound_ON_SC",
#     "unbound_OT1_SC",
#     "unbound_OT2_SC",
#     "cas9_ON_SC",
#     "cas9_OT1_SC",
#     "cas9_OT2_SC",
# ]

# Prevous ordering
colour_dict = {
    "unbound_ON_SC": "#C1879C",
    "unbound_OT1_SC": "#99ACBD",
    "unbound_OT2_SC": "#ECDFB6",
    "unbound_ON_REL": "#CE5782",
    "unbound_OT1_REL": "#6396C3",
    "unbound_OT2_REL": "#F3D16D",
    "cas9_ON_SC": "#D81B60",
    "cas9_OT1_SC": "#1E88E5",
    "cas9_OT2_SC": "#FFC107",
}

# colour_dict = {
#     "unbound_ON_SC": "#C1879C",
#     "unbound_OT1_SC": "#99ACBD",
#     "unbound_OT2_SC": "#ECDFB6",
#     "unbound_ON_REL": "#CE5782",
#     "unbound_OT1_REL": "#6396C3",
#     "unbound_OT2_REL": "#F3D16D",
#     "cas9_ON_SC": "#D81B60",
#     "cas9_OT1_SC": "#1E88E5",
#     "cas9_OT2_SC": "#FFC107",
# }

# Print ns for each sample
for sample in sample_order:
    print(f"{sample}: {len(df[df['sample_type'] == sample])}")

fig, ax = plt.subplots(figsize=(12, 8))

sns.violinplot(
    data=df, ax=ax, x="sample_type", y="min_feret", hue="sample_type", palette=colour_dict, order=sample_order
)

ax.set_ylabel("Mininum width (nm)", fontsize=20)
ax.set_xlabel("Sample type", fontsize=20)
# reformat x ticks
# plt.xticks(rotation=45, ha="right")
# manualy set x ticks with font size
ticks = ax.get_xticks()
ax.set_yticklabels(ax.get_yticks(), fontsize=18)
# plt.set_xticks(ticks, np.arange(9), x_ticks, fontsize=20, rotation=45, ha="right")
# convert this to be used with axes, setting the font size and rotation and ha
ax.set_xticks(ticks)
ax.set_xticklabels(x_ticks, fontsize=18, rotation=45, ha="right")
# ax.set_title(f"Min Feret width for grains with p_to_nm < {MAX_P_TO_NM}", fontsize=20)
# ax.set_ylim(0, 20)
fig.tight_layout()
# plt.savefig(FIG_SAVE_DIR / f"min_feret_violin_plot_{TODAY_DATE}_max_p_to_nm_{MAX_P_TO_NM}.png", dpi=500)
plt.show()

In [None]:
# plot the bad ferets

for grain in bad_feret_list:
    sample_type = grain["sample_type"]

    min_feret_coords = grain["min_feret_coords"]
    max_feret_coords = grain["max_feret_coords"]

    print(sample_type)
    fig, ax = plt.subplots(1, 2, figsize=(12, 6))
    ax[0].imshow(grain["image"])
    ax[0].set_title(f"sample: {sample_type} image")

    multiplied_ticks = np.array(ax[0].get_xticks()) * grain["p_to_nm"]
    multiplied_ticks = [f"{x:.1f}" for x in multiplied_ticks]
    ax[0].set_xticklabels(multiplied_ticks)

    multiplied_ticks = np.array(ax[0].get_yticks()) * grain["p_to_nm"]
    multiplied_ticks = [f"{x:.1f}" for x in multiplied_ticks]
    ax[0].set_yticklabels(multiplied_ticks)

    ax[1].imshow(grain["mask"])
    ax[1].set_title(f"sample: {sample_type} mask")
    ax[1].scatter(min_feret_coords[0, 0], min_feret_coords[0, 1], color="r")
    ax[1].scatter(min_feret_coords[1, 0], min_feret_coords[1, 1], color="r")

    multiplied_ticks = np.array(ax[1].get_xticks()) * grain["p_to_nm"]
    multiplied_ticks = [f"{x:.1f}" for x in multiplied_ticks]
    ax[1].set_xticklabels(multiplied_ticks)

    multiplied_ticks = np.array(ax[1].get_yticks()) * grain["p_to_nm"]
    multiplied_ticks = [f"{x:.1f}" for x in multiplied_ticks]
    ax[1].set_yticklabels(multiplied_ticks)

    plt.suptitle(f"sample: {grain['sample_type']} image: {grain['image']} p_to_nm: {grain['p_to_nm']}")
    plt.show()

In [None]:
import matplotlib.colors as mcolors


def hsv_to_hex(h, s, v):
    rgb = mcolors.hsv_to_rgb([h, s, v])
    return mcolors.rgb2hex(rgb)


def print_mean_and_std(type: str, data: np.ndarray):
    print(f"{type} | mean: {np.mean(data):.2f} std: {np.std(data):.2f}")

In [None]:
figsize = (4, 2.5)
axlabel_font_size = 14
# palettes of single colour of 3 different intensities
# pink
sc_palette = [hsv_to_hex(0.9, 0.5, 1.0), hsv_to_hex(0.9, 0.75, 0.9), hsv_to_hex(0.9, 1.0, 0.8)]
# blue
sc_cas9_palette = [hsv_to_hex(0.6, 0.5, 1.0), hsv_to_hex(0.6, 0.75, 0.9), hsv_to_hex(0.6, 1.0, 0.8)]
# green
rel_palette = [hsv_to_hex(0.45, 0.5, 1.0), hsv_to_hex(0.45, 0.75, 0.9), hsv_to_hex(0.45, 1.0, 0.8)]

# Feret ratios

In [None]:
# compare feret ratios

# kde of relaxed vs sc with cas9
df_relaxed = df[df["sample_type"].str.contains("REL")]
df_sc = df[df["sample_type"].str.contains("SC")]
print(f"num relaxed: {len(df_relaxed)}")
print(f"num sc: {len(df_sc)}")

fig, ax = plt.subplots(figsize=figsize)

sns.kdeplot(df_relaxed["feret_ratio"], ax=ax, label="rel", fill=True, common_norm=False, color=rel_palette[1])
print_mean_and_std("all rel", df_relaxed["feret_ratio"])
sns.kdeplot(df_sc["feret_ratio"], ax=ax, label="sc", fill=True, color=sc_palette[1])
print_mean_and_std("all_sc", df_sc["feret_ratio"])
ax.set_xlabel("Aspect ratio", fontsize=axlabel_font_size)
ax.set_ylabel("Density", fontsize=axlabel_font_size)
plt.legend(loc="upper left")
plt.show()

# kde of relaxed vs sc with cas9 separate
df_relaxed = df[df["sample_type"].str.contains("REL")]
df_sc_no_cas9 = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")]
df_sc_cas9 = df[df["sample_type"].str.contains("SC") & df["sample_type"].str.contains("cas9")]
print(f"num relaxed: {len(df_relaxed)}")
print(f"num sc no cas9: {len(df_sc_no_cas9)}")
print(f"num sc cas9: {len(df_sc_cas9)}")

fig, ax = plt.subplots(figsize=figsize)

sns.kdeplot(df_relaxed["feret_ratio"], ax=ax, label="rel", fill=True, common_norm=False, color=rel_palette[1])
print_mean_and_std("all rel", df_relaxed["feret_ratio"])
sns.kdeplot(df_sc_no_cas9["feret_ratio"], ax=ax, label="sc", fill=True, color=sc_palette[1])
print_mean_and_std("all sc", df_sc_no_cas9["feret_ratio"])
sns.kdeplot(df_sc_cas9["feret_ratio"], ax=ax, label="sc+dCas9", fill=True, color=sc_cas9_palette[1])
print_mean_and_std("all sc cas9", df_sc_cas9["feret_ratio"])
ax.set_xlabel("Aspect ratio", fontsize=axlabel_font_size)
ax.set_ylabel("Density", fontsize=axlabel_font_size)
plt.legend(loc="upper left")
plt.show()

# on_sc vs ot1_sc vs ot2_sc for non cas9 bound supercoiled
df_on_sc = df[df["sample_type"] == "unbound_ON_SC"]
df_ot1_sc = df[df["sample_type"] == "unbound_OT1_SC"]
df_ot2_sc = df[df["sample_type"] == "unbound_OT2_SC"]
print(f"num on sc: {len(df_on_sc)}")
print(f"num ot1 sc: {len(df_ot1_sc)}")
print(f"num ot2 sc: {len(df_ot2_sc)}")

fig, ax = plt.subplots(figsize=figsize)

sns.kdeplot(df_on_sc["feret_ratio"], ax=ax, label="on sc", fill=True, common_norm=False, color=sc_palette[0])
print_mean_and_std("on sc", df_on_sc["feret_ratio"])
sns.kdeplot(df_ot1_sc["feret_ratio"], ax=ax, label="ot1 sc", fill=True, common_norm=False, color=sc_palette[1])
print_mean_and_std("ot1 sc", df_ot1_sc["feret_ratio"])
sns.kdeplot(df_ot2_sc["feret_ratio"], ax=ax, label="ot2 sc", fill=True, common_norm=False, color=sc_palette[2])
print_mean_and_std("ot2 sc", df_ot2_sc["feret_ratio"])
ax.set_xlabel("Aspect ratio", fontsize=axlabel_font_size)
ax.set_ylabel("Density", fontsize=axlabel_font_size)
plt.legend(loc="upper left")
plt.show()


# dcas9 on vs ot1 vs ot2 using shades of orange
df_cas9_on = df[df["sample_type"] == "cas9_ON_SC"]
df_cas9_ot1 = df[df["sample_type"] == "cas9_OT1_SC"]
df_cas9_ot2 = df[df["sample_type"] == "cas9_OT2_SC"]
print(f"num cas9 on: {len(df_cas9_on)}")
print(f"num cas9 ot1: {len(df_cas9_ot1)}")
print(f"num cas9 ot2: {len(df_cas9_ot2)}")

fig, ax = plt.subplots(figsize=figsize)
sns.kdeplot(
    df_cas9_on["feret_ratio"], ax=ax, label="on sc\n+dCas9", fill=True, common_norm=False, color=sc_cas9_palette[0]
)
print_mean_and_std("on sc cas9", df_cas9_on["feret_ratio"])
sns.kdeplot(
    df_cas9_ot1["feret_ratio"], ax=ax, label="ot1 sc\n+dCas9", fill=True, common_norm=False, color=sc_cas9_palette[1]
)
print_mean_and_std("ot1 sc cas9", df_cas9_ot1["feret_ratio"])
sns.kdeplot(
    df_cas9_ot2["feret_ratio"], ax=ax, label="ot2 sc\n+dCas9", fill=True, common_norm=False, color=sc_cas9_palette[2]
)
print_mean_and_std("ot2 sc cas9", df_cas9_ot2["feret_ratio"])
ax.set_xlabel("Aspect ratio", fontsize=axlabel_font_size)
ax.set_ylabel("Density", fontsize=axlabel_font_size)
plt.legend(loc="upper left")
plt.show()

# Min feret

In [None]:
# min feret rel vs sc
df_relaxed = df[df["sample_type"].str.contains("REL")]
df_sc = df[df["sample_type"].str.contains("SC")]
print(f"num relaxed: {len(df_relaxed)}")
print(f"num sc: {len(df_sc)}")

fig, ax = plt.subplots(figsize=figsize)

sns.kdeplot(df_relaxed["min_feret"], ax=ax, label="rel", fill=True, color=rel_palette[1])
print_mean_and_std("all rel", df_relaxed["min_feret"])
sns.kdeplot(df_sc["min_feret"], ax=ax, label="sc", fill=True, color=sc_palette[1])
print_mean_and_std("all sc", df_sc["min_feret"])
ax.set_xlabel("Min width (nm)", fontsize=axlabel_font_size)
ax.set_ylabel("Density", fontsize=axlabel_font_size)
plt.legend(loc="upper left")
plt.show()

# with cas9 separate
df_relaxed = df[df["sample_type"].str.contains("REL")]
df_sc_no_cas9 = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")]
df_sc_cas9 = df[df["sample_type"].str.contains("SC") & df["sample_type"].str.contains("cas9")]
print(f"num relaxed: {len(df_relaxed)}")
print(f"num sc no cas9: {len(df_sc_no_cas9)}")
print(f"num sc cas9: {len(df_sc_cas9)}")

fig, ax = plt.subplots(figsize=figsize)

sns.kdeplot(df_relaxed["min_feret"], ax=ax, label="rel", fill=True, color=rel_palette[1])
print_mean_and_std("all rel", df_relaxed["min_feret"])
sns.kdeplot(df_sc_no_cas9["min_feret"], ax=ax, label="sc", fill=True, color=sc_palette[1])
print_mean_and_std("all sc", df_sc_no_cas9["min_feret"])
sns.kdeplot(df_sc_cas9["min_feret"], ax=ax, label="sc cas9", fill=True, color=sc_cas9_palette[1])
print_mean_and_std("all sc cas9", df_sc_cas9["min_feret"])
ax.set_xlabel("Min width (nm)", fontsize=axlabel_font_size)
ax.set_ylabel("Density", fontsize=axlabel_font_size)
plt.legend(loc="upper right")
plt.show()

# Contour lengths

In [None]:
# plot violin plots of the min ferets for each group

x_ticks = [
    "ON SC",
    "OT1 SC",
    "OT2 SC",
    "ON REL",
    "OT1 REL",
    "OT2 REL",
    "ON SC +dCas9",
    "OT1 SC +dCas9",
    "OT2 SC +dCas9",
]

sample_order = [
    "unbound_ON_SC",
    "unbound_OT1_SC",
    "unbound_OT2_SC",
    "unbound_ON_REL",
    "unbound_OT1_REL",
    "unbound_OT2_REL",
    "cas9_ON_SC",
    "cas9_OT1_SC",
    "cas9_OT2_SC",
]

colour_dict = {
    "unbound_ON_SC": "#C1879C",
    "unbound_OT1_SC": "#99ACBD",
    "unbound_OT2_SC": "#ECDFB6",
    "unbound_ON_REL": "#CE5782",
    "unbound_OT1_REL": "#6396C3",
    "unbound_OT2_REL": "#F3D16D",
    "cas9_ON_SC": "#D81B60",
    "cas9_OT1_SC": "#1E88E5",
    "cas9_OT2_SC": "#FFC107",
}


# Print ns for each sample
for sample in sample_order:
    print(f"{sample}: {len(df[df['contour_length'] == sample])}")

# VIOLIN PLOT
# fig, ax = plt.subplots(figsize=figsize)

# sns.violinplot(
#     data=df, ax=ax, x="sample_type", y="contour_length", hue="sample_type", palette=colour_dict, order=sample_order
# )

# ax.set_ylabel("Contour length (nm)", fontsize=axlabel_font_size)
# ax.set_xlabel("Sample type", fontsize=axlabel_font_size)
# # reformat x ticks
# # plt.xticks(rotation=45, ha="right")
# # manualy set x ticks with font size
# ticks = ax.get_xticks()
# ax.set_yticklabels(ax.get_yticks(), fontsize=18)
# # plt.set_xticks(ticks, np.arange(9), x_ticks, fontsize=20, rotation=45, ha="right")
# # convert this to be used with axes, setting the font size and rotation and ha
# ax.set_xticks(ticks)
# ax.set_xticklabels(x_ticks, fontsize=18, rotation=45, ha="right")
# # ax.set_title(f"Min Feret width for grains with p_to_nm < {MAX_P_TO_NM}", fontsize=20)
# # ax.set_ylim(0, 20)
# fig.tight_layout()
# # plt.savefig(FIG_SAVE_DIR / f"min_feret_violin_plot_{TODAY_DATE}_max_p_to_nm_{MAX_P_TO_NM}.png", dpi=500)
# plt.show()

# # Plot as kdes with rug plots
# fig, ax = plt.subplots(figsize=figsize)
# sns.kdeplot(data=df, ax=ax, x="contour_length", hue="sample_type", palette=colour_dict, common_norm=False, fill=True)
# ax.set_ylabel("Density", fontsize=axlabel_font_size)
# ax.set_xlabel("Contour length (nm)", fontsize=axlabel_font_size)
# plt.show()


# Separate by just cas9 bound and unbound and plot the kde
df_cas9 = df[df["sample_type"].str.contains("cas9")]
df_unbound = df[df["sample_type"].str.contains("unbound")]
fig, ax = plt.subplots(figsize=figsize)
sns.kdeplot(
    data=df_cas9, ax=ax, x="contour_length", common_norm=False, fill=True, label="dCas9 bound", color=sc_cas9_palette[1]
)
print_mean_and_std("sc cas9", df_cas9["contour_length"])
sns.kdeplot(
    data=df_unbound, ax=ax, x="contour_length", common_norm=False, fill=True, label="unbound", color=sc_palette[1]
)
print_mean_and_std("sc unbound", df_unbound["contour_length"])
ax.set_ylabel("Density", fontsize=axlabel_font_size)
ax.set_xlabel("Contour length (nm)", fontsize=axlabel_font_size)
ax.legend()
plt.show()

# Relaxed vs sc
df_relaxed = df[df["sample_type"].str.contains("REL")]
df_sc = df[df["sample_type"].str.contains("SC")]
fig, ax = plt.subplots(figsize=figsize)
sns.kdeplot(data=df_relaxed, ax=ax, x="contour_length", common_norm=False, fill=True, label="rel", color=rel_palette[1])
print_mean_and_std("all rel", df_relaxed["contour_length"])
sns.kdeplot(
    data=df_sc,
    ax=ax,
    x="contour_length",
    common_norm=False,
    fill=True,
    label="Supercoiled (including +dCas9)",
    color=sc_palette[1],
)
print_mean_and_std("all sc", df_sc["contour_length"])
ax.set_ylabel("Density", fontsize=axlabel_font_size)
ax.set_xlabel("Contour length (nm)", fontsize=axlabel_font_size)
ax.legend()
plt.show()

# Relaxed vs sc with dCas9 separate
df_relaxed = df[df["sample_type"].str.contains("REL")]
df_sc_no_cas9 = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")]
df_sc_cas9 = df[df["sample_type"].str.contains("SC") & df["sample_type"].str.contains("cas9")]
fig, ax = plt.subplots(figsize=figsize)
sns.kdeplot(
    data=df_relaxed,
    ax=ax,
    x="contour_length",
    common_norm=False,
    fill=True,
    # label=f"Relaxed, n={len(df_relaxed)}"
    label=f"rel",
    color=rel_palette[1],
)
print_mean_and_std("all rel", df_relaxed["contour_length"])
sns.kdeplot(
    data=df_sc_no_cas9,
    ax=ax,
    x="contour_length",
    common_norm=False,
    fill=True,
    # label=f"Supercoiled, n={len(df_sc_no_cas9)}",
    label=f"sc",
    color=sc_palette[1],
)
print_mean_and_std("all sc", df_sc_no_cas9["contour_length"])
sns.kdeplot(
    data=df_sc_cas9,
    ax=ax,
    x="contour_length",
    common_norm=False,
    fill=True,
    # label=f"Supercoiled +dCas9, n={len(df_sc_cas9)}",
    label=f"sc+dCas9",
    color=sc_cas9_palette[1],
)
print_mean_and_std("all sc cas9", df_sc_cas9["contour_length"])
ax.set_ylabel("Density", fontsize=axlabel_font_size)
ax.set_xlabel("Contour length (nm)", fontsize=axlabel_font_size)
ax.legend(loc="upper right")
plt.show()


# sc vs rel without cas9
df_relaxed = df[df["sample_type"].str.contains("REL") & ~df["sample_type"].str.contains("cas9")]
df_sc = df[df["sample_type"].str.contains("SC") & ~df["sample_type"].str.contains("cas9")]
fig, ax = plt.subplots(figsize=figsize)

sns.kdeplot(data=df_relaxed, ax=ax, x="contour_length", common_norm=False, fill=True, label="rel", color=rel_palette[1])
print_mean_and_std("all rel", df_relaxed["contour_length"])
sns.kdeplot(data=df_sc, ax=ax, x="contour_length", common_norm=False, fill=True, label="sc", color=sc_palette[1])
print_mean_and_std("all sc", df_sc["contour_length"])
ax.set_ylabel("Density", fontsize=axlabel_font_size)
ax.set_xlabel("Contour length (nm)", fontsize=axlabel_font_size)
ax.legend(loc="upper left")
plt.show()

# cas9 bound on_sc, ot1_sc, ot2_sc

df_cas9_on = df[df["sample_type"] == "cas9_ON_SC"]
df_cas9_ot1 = df[df["sample_type"] == "cas9_OT1_SC"]
df_cas9_ot2 = df[df["sample_type"] == "cas9_OT2_SC"]
print(f"num cas9 on: {len(df_cas9_on)}")
print(f"num cas9 ot1: {len(df_cas9_ot1)}")
print(f"num cas9 ot2: {len(df_cas9_ot2)}")

fig, ax = plt.subplots(figsize=figsize)

sns.kdeplot(
    df_cas9_on["contour_length"], ax=ax, label="on sc+dCas9", fill=True, common_norm=False, color=sc_cas9_palette[0]
)
print_mean_and_std("on sc cas9", df_cas9_on["contour_length"])
sns.kdeplot(
    df_cas9_ot1["contour_length"], ax=ax, label="ot1 sc+dCas9", fill=True, common_norm=False, color=sc_cas9_palette[1]
)
print_mean_and_std("ot1 sc cas9", df_cas9_ot1["contour_length"])
sns.kdeplot(
    df_cas9_ot2["contour_length"], ax=ax, label="ot2 sc+dCas9", fill=True, common_norm=False, color=sc_cas9_palette[2]
)
print_mean_and_std("ot2 sc cas9", df_cas9_ot2["contour_length"])
ax.set_xlabel("Contour length (nm)", fontsize=axlabel_font_size)
ax.set_ylabel("Density", fontsize=axlabel_font_size)
plt.legend(loc="upper right")
plt.show()

# Protein areas

In [None]:
# # Now grab a df for only the cas9 bound grains
# df_cas9 = df[df["sample_type"].str.contains("cas9")]

# # plot kdes for each sample type's protein volume and area
# fig, ax = plt.subplots(figsize=figsize)

# sns.kdeplot(data=df_cas9, ax=ax, x="protein_area", hue="sample_type", palette=colour_dict, common_norm=False, fill=True)

# ax.set_ylabel("Density", fontsize=axlabel_font_size)
# ax.set_xlabel("Protein area (nm^2)", fontsize=axlabel_font_size)
# plt.show()

# fig, ax = plt.subplots(figsize=figsize)

# sns.kdeplot(
#     data=df_cas9, ax=ax, x="protein_volume", hue="sample_type", palette=colour_dict, common_norm=False, fill=True
# )

# ax.set_ylabel("Density", fontsize=axlabel_font_size)
# ax.set_xlabel("Protein volume (nm^3)", fontsize=axlabel_font_size)

# plt.show()

# protein areas for each type
fig, ax = plt.subplots(figsize=figsize)

df_cas9_on_sc = df[df["sample_type"] == "cas9_ON_SC"]
df_cas9_ot1_sc = df[df["sample_type"] == "cas9_OT1_SC"]
df_cas9_ot2_sc = df[df["sample_type"] == "cas9_OT2_SC"]

sns.kdeplot(
    data=df_cas9_on_sc,
    ax=ax,
    x="protein_area",
    common_norm=False,
    fill=True,
    label="on sc+dCas9",
    color=sc_cas9_palette[0],
)
print_mean_and_std("on sc cas9", df_cas9_on_sc["protein_area"])
sns.kdeplot(
    data=df_cas9_ot1_sc,
    ax=ax,
    x="protein_area",
    common_norm=False,
    fill=True,
    label="ot1 sc+dCas9",
    color=sc_cas9_palette[1],
)
print_mean_and_std("ot1 sc cas9", df_cas9_ot1_sc["protein_area"])
sns.kdeplot(
    data=df_cas9_ot2_sc,
    ax=ax,
    x="protein_area",
    common_norm=False,
    fill=True,
    label="ot2 sc+dCas9",
    color=sc_cas9_palette[2],
)
print_mean_and_std("ot2 sc cas9", df_cas9_ot2_sc["protein_area"])

ax.set_ylabel("Density", fontsize=axlabel_font_size)
ax.set_xlabel("Protein area (nm^2)", fontsize=axlabel_font_size)
ax.legend(loc="upper right")
plt.show()

# protein volumes for each type
fig, ax = plt.subplots(figsize=figsize)

df_cas9_on_sc = df[df["sample_type"] == "cas9_ON_SC"]
df_cas9_ot1_sc = df[df["sample_type"] == "cas9_OT1_SC"]
df_cas9_ot2_sc = df[df["sample_type"] == "cas9_OT2_SC"]

sns.kdeplot(
    data=df_cas9_on_sc,
    ax=ax,
    x="protein_volume",
    common_norm=False,
    fill=True,
    label="on sc+dCas9",
    color=sc_cas9_palette[0],
)
print_mean_and_std("on sc cas9", df_cas9_on_sc["protein_volume"])
sns.kdeplot(
    data=df_cas9_ot1_sc,
    ax=ax,
    x="protein_volume",
    common_norm=False,
    fill=True,
    label="ot1 sc+dCas9",
    color=sc_cas9_palette[1],
)
print_mean_and_std("ot1 sc cas9", df_cas9_ot1_sc["protein_volume"])
sns.kdeplot(
    data=df_cas9_ot2_sc,
    ax=ax,
    x="protein_volume",
    common_norm=False,
    fill=True,
    label="ot2 sc+dCas9",
    color=sc_cas9_palette[2],
)
print_mean_and_std("ot2 sc cas9", df_cas9_ot2_sc["protein_volume"])

ax.set_ylabel("Density", fontsize=axlabel_font_size)
ax.set_xlabel("Protein volume (nm^3)", fontsize=axlabel_font_size)
ax.legend(loc="upper right")
plt.show()