In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re

In [None]:
customPalette = {
    "TEL12 Shelterin": "#9254BF",
    "TEL5 Shelterin": "#D4B1ED",
    "TEL80 Shelterin": "#46106D",
    "TEL12 TRF2": "#DA2828",
    "TEL12 TRF1cTRFH": "#6799F0",
    "TEL12 TRF2ΔTRFH": "#D37B7B",
    "TEL12 TRF1": "#0B4FC3",
    "TEL12 TRF2cTRFH": "#DF3E3E",
    "TEL80 TRF2": "#7B1414",
    "TEL5 TRF2": "#FF8F8F",
    "TEL5 TRF2ΔTRFH": "#000000",
    "TEL12 Trf2cB": "#000000",
    "TEL12 Trf2cHinge": "#000000",
}

In [None]:
results_dir = Path("/Users/sylvi/optical_data/shelterin/processed")
assert results_dir.exists(), f"Results directory {results_dir} does not exist."
stats_file = results_dir / "rupture_stats.csv"
assert stats_file.exists(), f"Stats file {stats_file} does not exist."
df_rupture_stats = pd.read_csv(stats_file)

sns.stripplot(
    data=df_rupture_stats,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
)
plt.xticks(rotation=90)
# place legend outside the plot
plt.legend(title="protein and tel reps", bbox_to_anchor=(1.05, 1), loc="upper left")
plt.show()

In [None]:
# Plot just shelterin tel5, tel12 and tel80
df_rupture_stats_shelterin = df_rupture_stats[df_rupture_stats["protein_name"] == "Shelterin"]
sns.boxplot(
    data=df_rupture_stats_shelterin,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="work_done_fJ",
    dodge=False,
    palette=customPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_shelterin,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="work_done_fJ",
    dodge=False,
    palette=customPalette,
)
plt.legend().set_visible(False)
# for each xtick, only show the number of telreps
ax = plt.gca()
xticks = ax.get_xticks()
xticklabels = []
for xtick in ax.get_xticklabels():
    tick_text = xtick.get_text()
    telreps_match = re.search(r"TEL(\d+)", tick_text)
    telreps = telreps_match.group(1) if telreps_match else tick_text
    xticklabels.append(telreps)
ax.set_xticks(xticks)
ax.set_xticklabels(xticklabels, rotation=90)
plt.xlabel("Telomeric repeats")
plt.ylabel("Work done (fJ)")
plt.show()


sns.boxplot(
    data=df_rupture_stats_shelterin,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_shelterin,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
)
plt.legend().set_visible(False)
# for each xtick, only show the number of telreps
ax = plt.gca()
xticks = ax.get_xticks()
xticklabels = []
for xtick in ax.get_xticklabels():
    tick_text = xtick.get_text()
    telreps_match = re.search(r"TEL(\d+)", tick_text)
    telreps = telreps_match.group(1) if telreps_match else tick_text
    xticklabels.append(telreps)
ax.set_xticks(xticks)
ax.set_xticklabels(xticklabels, rotation=90)
plt.xlabel("Telomeric repeats")
plt.ylabel("Peak force (pN)")
plt.show()