In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
base_dir = Path("/Users/sylvi/topo_data/shelterin")
assert base_dir.exists()
all_stats_file = base_dir / "output-libby-curated" / "all_statistics.csv"
assert all_stats_file.exists()
all_stats = pd.read_csv(all_stats_file)
print(all_stats.columns)

# TEL80S protein volumes

In [None]:
# protein volume for TEL80 S
# grab TEL80shelterin by selecting rows with "TEL80shelterin" in the "basename" column
data_tel80s = all_stats[all_stats["basename"].str.contains("TEL80shelterin")]
print(f"tel80s: {len(data_tel80s)}")
data_tel80s_protein = data_tel80s[data_tel80s["class_number"] == 2]
print(f"tel80s protein: {len(data_tel80s_protein)}")

plt.figure()
sns.stripplot(data_tel80s_protein["volume"])
plt.title("Volume of TEL80shelterin proteins")
plt.show()

# TBL for TEL80C & TEL80S

In [None]:
data_tel80c = all_stats[all_stats["basename"].str.contains("TEL80control")]
print(f"tel80c: {len(data_tel80c)}")
data_tel80s = all_stats[all_stats["basename"].str.contains("TEL80shelterin")]
print(f"tel80s: {len(data_tel80s)}")

# grab only class 1 subgrain 0 since data is duplicated for each subgrain and class
data_tel80c_s0_c1 = data_tel80c[(data_tel80c["subgrain_number"] == 0) & (data_tel80c["class_number"] == 1)]
data_tel80s_s0_c1 = data_tel80s[(data_tel80s["subgrain_number"] == 0) & (data_tel80s["class_number"] == 1)]
print(f"tel80c_s1_c1: {len(data_tel80c_s0_c1)}")
print(f"tel80s_s1_c1: {len(data_tel80s_s0_c1)}")

# plot TBL for both
plt.figure()
sns.stripplot(data_tel80c_s0_c1["total_branch_lengths"], label="TEL80control")
sns.stripplot(data_tel80s_s0_c1["total_branch_lengths"], label="TEL80shelterin")
plt.legend()
plt.title("Total branch lengths of TEL80control and TEL80shelterin")
plt.show()

In [None]:
data_tel80s = all_stats[all_stats["basename"].str.contains("TEL80shelterin")]
images = data_tel80s["image"].unique()
tbl_total_volume_pairs = []
tbl_maximum_volume_pairs = []
for image in images:
    subdata_images = data_tel80s[data_tel80s["image"] == image]
    grain_numbers = subdata_images["grain_number"].unique()
    for grain_number in grain_numbers:
        subdata_grain = subdata_images[subdata_images["grain_number"] == grain_number]
        class_numbers = subdata_grain["class_number"].unique()
        if 1 in class_numbers and 2 in class_numbers:
            tbl = subdata_grain.iloc[0]["total_branch_lengths"]
            total_volume = subdata_grain[subdata_grain["class_number"] == 2]["volume"].sum()
            maximum_volume = subdata_grain[subdata_grain["class_number"] == 2]["volume"].max()
            tbl_total_volume_pairs.append((tbl, total_volume))
            tbl_maximum_volume_pairs.append((tbl, maximum_volume))

tbls = [pair[0] for pair in tbl_total_volume_pairs]
total_volumes = [pair[1] for pair in tbl_total_volume_pairs]
maximum_volumes = [pair[1] for pair in tbl_maximum_volume_pairs]

plt.figure()
plt.scatter(tbls, total_volumes)
plt.title("Total branch lengths vs total volume")
plt.xlabel("Total branch lengths")
plt.ylabel("Total volume")
plt.show()

plt.figure()
plt.scatter(tbls, maximum_volumes)
plt.title("Total branch lengths vs maximum volume")
plt.xlabel("Total branch lengths")
plt.ylabel("Maximum volume")
plt.show()

# TBL & height for TEL80C

In [None]:
data_tel80c = all_stats[all_stats["basename"].str.contains("TEL80control")]
images = data_tel80c["image"].unique()
tbl_height_pairs = []
for image in images:
    subdata_images = data_tel80c[data_tel80c["image"] == image]
    grain_numbers = subdata_images["grain_number"].unique()
    for grain_number in grain_numbers:
        subdata_grain = subdata_images[subdata_images["grain_number"] == grain_number]
        subdata_subgrains = subdata_grain["subgrain_number"].unique()
        for subgrain_number in subdata_subgrains:
            subdata_subgrain = subdata_grain[subdata_grain["subgrain_number"] == subgrain_number]
            assert len(subdata_subgrain) == 1, f"len(subdata_subgrain) = {len(subdata_subgrain)}"
            class_number = subdata_subgrain["class_number"].values[0]
            if class_number == 1:
                tbl = subdata_subgrain["total_branch_lengths"].values[0]
                median_height = subdata_subgrain["height_median"]
                tbl_height_pairs.append((tbl, median_height))

tbls = [pair[0] for pair in tbl_height_pairs]
heights = [pair[1] for pair in tbl_height_pairs]

plt.figure()
plt.scatter(tbls, heights)
plt.title("Total branch lengths vs median height")
plt.xlabel("Total branch lengths")
plt.ylabel("Median height")
plt.show()

# TBL for TEL12

In [None]:
data_tel12c = all_stats[all_stats["basename"].str.contains("TEL12control")]
print(f"tel12c: {len(data_tel12c)}")
data_tel12s = all_stats[all_stats["basename"].str.contains("TEL12shelterin")]
print(f"tel12s: {len(data_tel12s)}")
data_tel12trf1 = all_stats[all_stats["basename"].str.contains("TEL12TRF1")]
print(f"tel12trf1: {len(data_tel12trf1)}")
data_tel12trf2 = all_stats[all_stats["basename"].str.contains("TEL12TRF2")]
print(f"tel12trf2: {len(data_tel12trf2)}")

data_tel12c_s0_c1 = data_tel12c[(data_tel12c["subgrain_number"] == 0) & (data_tel12c["class_number"] == 1)]
print(f"tel12c_s0_c1: {len(data_tel12c_s0_c1)}")
data_tel12s_s0_c1 = data_tel12s[(data_tel12s["subgrain_number"] == 0) & (data_tel12s["class_number"] == 1)]
print(f"tel12s_s0_c1: {len(data_tel12s_s0_c1)}")
data_tel12trf1_s0_c1 = data_tel12trf1[(data_tel12trf1["subgrain_number"] == 0) & (data_tel12trf1["class_number"] == 1)]
print(f"tel12trf1_s0_c1: {len(data_tel12trf1_s0_c1)}")
data_tel12trf2_s0_c1 = data_tel12trf2[(data_tel12trf2["subgrain_number"] == 0) & (data_tel12trf2["class_number"] == 1)]
print(f"tel12trf2_s0_c1: {len(data_tel12trf2_s0_c1)}")

data_tbl_tel12 = pd.concat([data_tel12c_s0_c1, data_tel12s_s0_c1, data_tel12trf1_s0_c1, data_tel12trf2_s0_c1])
plt.figure()
sns.stripplot(x="basename", y="total_branch_lengths", data=data_tbl_tel12)
# make labels vertical
plt.xticks(rotation=90)
plt.title("Total branch lengths of TEL12 proteins")
plt.show()