In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re

In [None]:
customPalette = {
    "TEL12 Shelterin": "#9254BF",
    "TEL5 Shelterin": "#D4B1ED",
    "TEL80 Shelterin": "#46106D",
    "TEL12 TRF2": "#DA2828",
    "TEL12 TRF1cTRFH": "#6799F0",
    "TEL12 TRF2ΔTRFH": "#D37B7B",
    "TEL12 TRF1": "#0B4FC3",
    "TEL12 TRF2cTRFH": "#DF3E3E",
    "TEL80 TRF2": "#7B1414",
    "TEL5 TRF2": "#FF8F8F",
    "TEL5 TRF2ΔTRFH": "#000000",
    "TEL12 Trf2cB": "#000000",
    "TEL12 Trf2cHinge": "#000000",
}

proteinPalette = {
    "Shelterin": "#9060A8",
    "TRF1": "#2357A7",
    "TRF2": "#C63F40",
    "TRF1cTRFH": "#6799F0",
    "TRF2cTRFH": "#DF3E3E",
    
}

In [None]:
results_dir = Path("/Users/sylvi/optical_data/shelterin/processed")
assert results_dir.exists(), f"Results directory {results_dir} does not exist."
stats_file = results_dir / "rupture_stats.csv"
assert stats_file.exists(), f"Stats file {stats_file} does not exist."
df_rupture_stats = pd.read_csv(stats_file)

sns.stripplot(
    data=df_rupture_stats,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
)
plt.xticks(rotation=0)
# place legend outside the plot
plt.legend(title="protein and tel reps", bbox_to_anchor=(1.05, 1), loc="upper left")
plt.show()

print("Protein and tel reps counts:")
print(df_rupture_stats["protein_telreps"].value_counts())

In [None]:
# Plot just shelterin tel5, tel12 and tel80
df_rupture_stats_shelterin = df_rupture_stats[df_rupture_stats["protein_name"] == "Shelterin"]
sns.boxplot(
    data=df_rupture_stats_shelterin,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="work_done_fJ",
    dodge=False,
    palette=customPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_shelterin,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="work_done_fJ",
    dodge=False,
    palette=customPalette,
)
plt.legend().set_visible(False)
# for each xtick, only show the number of telreps
ax = plt.gca()
xticks = ax.get_xticks()
xticklabels = []
for xtick in ax.get_xticklabels():
    tick_text = xtick.get_text()
    telreps_match = re.search(r"TEL(\d+)", tick_text)
    telreps = telreps_match.group(1) if telreps_match else tick_text
    xticklabels.append(telreps)
ax.set_xticks(xticks)
ax.set_xticklabels(xticklabels, rotation=0)
plt.xlabel("Telomeric repeats")
plt.ylabel("Work done (fJ)")
plt.show()


sns.boxplot(
    data=df_rupture_stats_shelterin,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_shelterin,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
)
plt.legend().set_visible(False)
# for each xtick, only show the number of telreps
ax = plt.gca()
xticks = ax.get_xticks()
xticklabels = []
for xtick in ax.get_xticklabels():
    tick_text = xtick.get_text()
    telreps_match = re.search(r"TEL(\d+)", tick_text)
    telreps = telreps_match.group(1) if telreps_match else tick_text
    xticklabels.append(telreps)
ax.set_xticks(xticks)
ax.set_xticklabels(xticklabels, rotation=0)
plt.xlabel("Telomeric repeats")
plt.ylabel("Peak force (pN)")
plt.show()
# also plot as a scatter
sns.scatterplot(
    data=df_rupture_stats_shelterin,
    x="telreps",
    hue="protein_telreps",
    y="peak_force_pN",
    palette=customPalette,
)
# fit a line to the scatter data
sns.regplot(
    data=df_rupture_stats_shelterin,
    x="telreps",
    y="peak_force_pN",
    scatter=False,
    ci=None,
    line_kws={"color": "black", "linestyle": "--"},
    scatter_kws={"s": 50},
    order=1,
    label="Linear fit",
)

In [None]:
# Tel5, 12, 80 for TRF2
df_rupture_stats_trf2 = df_rupture_stats[df_rupture_stats["protein_name"] == "TRF2"]
sns.boxplot(
    data=df_rupture_stats_trf2,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="work_done_fJ",
    dodge=False,
    palette=customPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_trf2,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="work_done_fJ",
    dodge=False,
    palette=customPalette,
)
plt.legend().set_visible(False)
# for each xtick, only show the number of telreps
ax = plt.gca()
xticks = ax.get_xticks()
xticklabels = []
for xtick in ax.get_xticklabels():      
    tick_text = xtick.get_text()
    telreps_match = re.search(r"TEL(\d+)", tick_text)
    telreps = telreps_match.group(1) if telreps_match else tick_text
    xticklabels.append(telreps)
ax.set_xticks(xticks)
ax.set_xticklabels(xticklabels, rotation=90)
plt.xlabel("Telomeric repeats")
plt.ylabel("Work done (fJ)")
plt.show()


sns.boxplot(
    data=df_rupture_stats_trf2,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_trf2,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
)
plt.legend().set_visible(False)
# for each xtick, only show the number of telreps
ax = plt.gca()
xticks = ax.get_xticks()
xticklabels = []
for xtick in ax.get_xticklabels():
    tick_text = xtick.get_text()
    telreps_match = re.search(r"TEL(\d+)", tick_text)
    telreps = telreps_match.group(1) if telreps_match else tick_text
    xticklabels.append(telreps)
ax.set_xticks(xticks)
ax.set_xticklabels(xticklabels, rotation=0)
plt.xlabel("Telomeric repeats")
plt.ylabel("Peak force (pN)")
plt.show()

# also plot as a scatter
sns.scatterplot(
    data=df_rupture_stats_trf2,
    x="telreps",
    hue="protein_telreps",
    y="peak_force_pN",
    palette=customPalette,
)
# fit a line to the scatter data
sns.regplot(
    data=df_rupture_stats_trf2,
    x="telreps",
    y="peak_force_pN",
    scatter=False,
    ci=None,
    line_kws={"color": "black", "linestyle": "--"},
    scatter_kws={"s": 50},
    order=1,
    label="Linear fit",
)

In [None]:
# NOT ENOUGH TRF1, SKIP

# trf2 v trf2deltah v trf1
# df_rupture_stats_trf2_trf2_deltah_trf1 = df_rupture_stats[
#     (df_rupture_stats["protein_name"] == "TRF2ΔTRFH")
#     | (df_rupture_stats["protein_name"] == "TRF2")
#     | (df_rupture_stats["protein_name"] == "TRF1")
# ]
# print(df_rupture_stats_trf2_trf2_deltah_trf1["protein_name"].value_counts())

In [None]:
# All TEL12
df_rupture_stats_tel12 = df_rupture_stats[df_rupture_stats["telreps"] == 12]
sns.boxplot(
    data=df_rupture_stats_tel12,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="work_done_fJ",
    dodge=False,
    palette=customPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_tel12,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="work_done_fJ",
    dodge=False,
    palette=customPalette,
)
plt.legend().set_visible(False)
plt.xticks(rotation=90)
plt.show()

sns.boxplot(
    data=df_rupture_stats_tel12,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_tel12,
    x="protein_telreps_concentration",
    hue="protein_telreps",
    y="peak_force_pN",
    dodge=False,
    palette=customPalette,
)
plt.legend().set_visible(False)
plt.xticks(rotation=90)
plt.show()

In [None]:
# All shelterin vs all TRF2
df_rupture_stats_shelterin_trf2 = df_rupture_stats[
    (df_rupture_stats["protein_name"] == "Shelterin")
    | (df_rupture_stats["protein_name"] == "TRF2")
]
print(f"counts:")
print(df_rupture_stats_shelterin_trf2["protein_name"].value_counts())
# plot separated by protein_name only on the x axis, not protein_telreps_concentration
# colour them by the protein palette too
# work done
sns.boxplot(
    data=df_rupture_stats_shelterin_trf2,
    x="protein_name",
    hue="protein_name",
    y="work_done_fJ",
    dodge=False,
    palette=proteinPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_shelterin_trf2,
    x="protein_name",
    hue="protein_name",
    y="work_done_fJ",
    dodge=False,
    palette=proteinPalette,
)
plt.legend().set_visible(False)
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Work done (fJ)")
plt.show()
# peak force
sns.boxplot(
    data=df_rupture_stats_shelterin_trf2,
    x="protein_name",
    hue="protein_name",
    y="peak_force_pN",
    dodge=False,
    palette=proteinPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_shelterin_trf2,
    x="protein_name",
    hue="protein_name",
    y="peak_force_pN",
    dodge=False,
    palette=proteinPalette,
)
plt.legend().set_visible(False)
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Peak force (pN)")
plt.show()

In [None]:
# and now for Shelterin vs TRF1, TRF2, TRF1cTRFH and TRF2cTRFH
df_rupture_stats_shelterin_trf1_trf2_trf1ctrfh_trf2ctrfh = df_rupture_stats[
    (df_rupture_stats["protein_name"] == "Shelterin")
    | (df_rupture_stats["protein_name"] == "TRF1")
    | (df_rupture_stats["protein_name"] == "TRF2")
    | (df_rupture_stats["protein_name"] == "TRF1cTRFH")
    | (df_rupture_stats["protein_name"] == "TRF2cTRFH")
]
print(f"counts:")
print(df_rupture_stats_shelterin_trf1_trf2_trf1ctrfh_trf2ctrfh["protein_name"].value_counts())
# plot separated by protein_name only on the x axis, not protein_telreps_concentration
# colour them by the protein palette too
# work done
sns.boxplot(
    data=df_rupture_stats_shelterin_trf1_trf2_trf1ctrfh_trf2ctrfh,
    x="protein_name",
    hue="protein_name",
    y="work_done_fJ",
    dodge=False,
    palette=proteinPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_shelterin_trf1_trf2_trf1ctrfh_trf2ctrfh,
    x="protein_name",
    hue="protein_name",
    y="work_done_fJ",
    dodge=False,
    palette=proteinPalette,
)
plt.xticks(rotation=90)
plt.xlabel("")
plt.ylabel("Work done (fJ)")
plt.show()
# peak force
sns.boxplot(
    data=df_rupture_stats_shelterin_trf1_trf2_trf1ctrfh_trf2ctrfh,
    x="protein_name",
    hue="protein_name",
    y="peak_force_pN",
    dodge=False,
    palette=proteinPalette,
    showfliers=False,
)
sns.stripplot(
    data=df_rupture_stats_shelterin_trf1_trf2_trf1ctrfh_trf2ctrfh,
    x="protein_name",
    hue="protein_name",
    y="peak_force_pN",
    dodge=False,
    palette=proteinPalette,
)
plt.xticks(rotation=90)
plt.xlabel("")
plt.ylabel("Peak force (pN)")
plt.show()