In [None]:
from pathlib import Path
import pickle as pkl
import re

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
DATA_DIR = Path("/Users/sylvi/topo_data/hariborings/extracted_grains/")
DATE = "2024-05-21"
assert DATA_DIR.exists()

MAX_P_TO_NM = 0.59

SAMPLES = [
    "unbound_ON_REL",
    "unbound_ON_SC",
    "unbound_OT1_REL",
    "unbound_OT1_SC",
    "unbound_OT2_REL",
    "unbound_OT2_SC",
    "cas9_ON_SC",
    "cas9_OT1_SC",
    "cas9_OT2_SC",
]

# Create a big dataframe holding:
# - sample type
# - p_to_nm
# - min_feret

data_list = []

for sample_type in SAMPLES:
    print(f"loading {sample_type}")
    # Load the data from pickle
    with open(DATA_DIR / sample_type / f"date_{DATE}" / f"feret_grain_dict_fig1.pkl", "rb") as f:
        feret_data = pkl.load(f)

    for grain_index, grain in feret_data.items():
        image = grain["image"]
        if "cas9_" in sample_type:
            mask = grain["predicted_mask"]
        elif "unbound_" in sample_type:
            mask = grain["mask"]
        else:
            raise ValueError()
        p_to_nm = grain["p_to_nm"]
        min_feret = grain["min_feret"]
        max_feret = grain["max_feret"]

        if p_to_nm <= MAX_P_TO_NM:
            data_list.append(
                {
                    "sample_type": sample_type,
                    "p_to_nm": p_to_nm,
                    "min_feret": min_feret,
                    "max_feret": max_feret,
                }
            )

print(f"num grains: {len(data_list)}")

df = pd.DataFrame(data_list)

print(df.head())


# Save the dataframe
df.to_csv(DATA_DIR / f"feret_data_{DATE}_max_p_to_nm_{MAX_P_TO_NM}.csv", index=False)

In [None]:
# plot violin plots of the min ferets for each group

# colours = ["#D81B60", "#CE5782", "#1E88E5", "#6396C3", "#FFC107", "#F3D16D", "#C1879C", "#99ACBD", "#ECDFB6"]

x_ticks = [
    "ON SC",
    "OT1 SC",
    "OT2 SC",
    "ON REL",
    "OT1 REL",
    "OT2 REL",
    "ON SC +dCas9",
    "OT1 SC +dCas9",
    "OT2 SC +dCas9",
]

# Previous ordering
# sample_order = [
#     "unbound_ON_SC",
#     "unbound_OT1_SC",
#     "unbound_OT2_SC",
#     "unbound_ON_REL",
#     "unbound_OT1_REL",
#     "unbound_OT2_REL",
#     "cas9_ON_SC",
#     "cas9_OT1_SC",
#     "cas9_OT2_SC",
# ]

sample_order = [
    "unbound_ON_REL",
    "unbound_OT1_REL",
    "unbound_OT2_REL",
    "unbound_ON_SC",
    "unbound_OT1_SC",
    "unbound_OT2_SC",
    "cas9_ON_SC",
    "cas9_OT1_SC",
    "cas9_OT2_SC",
]

# Prevous ordering
# colour_dict = {
#     "unbound_ON_SC": "#C1879C",
#     "unbound_OT1_SC": "#99ACBD",
#     "unbound_OT2_SC": "#ECDFB6",
#     "unbound_ON_REL": "#CE5782",
#     "unbound_OT1_REL": "#6396C3",
#     "unbound_OT2_REL": "#F3D16D",
#     "cas9_ON_SC": "#D81B60",
#     "cas9_OT1_SC": "#1E88E5",
#     "cas9_OT2_SC": "#FFC107",
# }

colour_dict = {
    "unbound_ON_SC": "#C1879C",
    "unbound_OT1_SC": "#99ACBD",
    "unbound_OT2_SC": "#ECDFB6",
    "unbound_ON_REL": "#CE5782",
    "unbound_OT1_REL": "#6396C3",
    "unbound_OT2_REL": "#F3D16D",
    "cas9_ON_SC": "#D81B60",
    "cas9_OT1_SC": "#1E88E5",
    "cas9_OT2_SC": "#FFC107",
}

# Print ns for each sample
for sample in sample_order:
    print(f"{sample}: {len(df[df['sample_type'] == sample])}")

fig, ax = plt.subplots(figsize=(12, 8))

sns.violinplot(
    data=df, ax=ax, x="sample_type", y="min_feret", hue="sample_type", palette=colour_dict, order=sample_order
)

ax.set_ylabel("Mininum width (nm)", fontsize=20)
ax.set_xlabel("Sample type", fontsize=20)
# reformat x ticks
# plt.xticks(rotation=45, ha="right")
# manualy set x ticks with font size
ticks = ax.get_xticks()
ax.set_yticklabels(ax.get_yticks(), fontsize=18)
# plt.set_xticks(ticks, np.arange(9), x_ticks, fontsize=20, rotation=45, ha="right")
# convert this to be used with axes, setting the font size and rotation and ha
ax.set_xticks(ticks)
ax.set_xticklabels(x_ticks, fontsize=18, rotation=45, ha="right")
# ax.set_title(f"Min Feret width for grains with p_to_nm < {MAX_P_TO_NM}", fontsize=20)
plt.show()

In [None]:
# Plot the largest 3 cas9 OT2_SC grains in terms of min feret

min_feret_threshold = 14

# Filter the dictionary
large_feret_dict = {}