In [1]:
import numpy as np
import plotly.graph_objects as go
import polars as pl
import umap
from pipeline import cluster, find_lowest_common_ancestor

from .LKH import save_EDGE_WEIGHT_SECTION, save_NODE_COORD_SECTION

In [2]:
YOX1 = "YML027W"
YHP1 = "YDR451C"
CLN3 = "YAL040C"

df_data = pl.read_csv("../data/preprocessed_2090_combined_timeseries.csv", null_values=["NA"])
genes = np.array(df_data.columns)

target_gene_indecies = np.array(
    [
        np.argwhere(genes == YOX1).squeeze(),
        np.argwhere(genes == YHP1).squeeze(),
        np.argwhere(genes == CLN3).squeeze(),
    ]
)

df_ccm = pl.read_csv("../data/Cell_Cycle_5690_CCM_all_vs_all.csv", null_values=["NA"]).drop("")

best_rho = df_ccm.group_by(["lib_column", "target_column"]).agg(pl.max("rho")).sort("lib_column", "target_column")

N = len(df_ccm.unique(subset="lib_column"))
ccm = best_rho["rho"].to_numpy().reshape((N, N))

ccm_clustered, Z_ccm = cluster(ccm, method="average")

lca, paths, leaves = find_lowest_common_ancestor(
    Z_ccm[0],
    target_gene_indecies,
)

ccm_extracted_cluster = ccm[leaves][:, leaves]
ccm_extracted_cluster.shape

(1146, 1146)

In [3]:
fit = umap.UMAP(n_neighbors=3)
embedding_of_ccm_extracted_cluster: np.ndarray = fit.fit_transform(ccm_extracted_cluster)  # type: ignore

layout = go.Layout(
    title="UMAP of CCM genes",
    xaxis=dict(title="UMAP1"),
    yaxis=dict(title="UMAP2"),
    width=800,
    height=800,
)

data = [
    go.Scatter(
        x=embedding_of_ccm_extracted_cluster[:, 0],
        y=embedding_of_ccm_extracted_cluster[:, 1],
        mode="markers",
        marker=dict(color="blue"),
        text=[genes[i] for i in leaves],
        hoverinfo="text",
    )
]

fig = go.Figure(data=data, layout=layout)
fig.show()

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [4]:
DEPOT = np.argwhere(leaves == np.argwhere(genes == CLN3).squeeze()).squeeze() + 1
DEPOT

np.int64(1042)

In [5]:
save_NODE_COORD_SECTION(embedding_of_ccm_extracted_cluster, "../outputs/NODE_COORD_SECTION.txt")

In [6]:
save_EDGE_WEIGHT_SECTION(ccm_extracted_cluster, "../outputs/EDGE_WEIGHT_SECTION.txt")

In [7]:
with open("../LKH-3.0.10/gene.tour", "r") as f:
    for line in f:
        if line.startswith("TOUR_SECTION"):
            break
    tour = np.array([int(line) - 1 for line in f.readlines()[:-2]])


layout = go.Layout(
    title="UMAP of CCM genes",
    xaxis=dict(title="UMAP1"),
    yaxis=dict(title="UMAP2"),
    width=800,
    height=800,
)

data = [
    go.Scatter(
        x=embedding_of_ccm_extracted_cluster[:, 0],
        y=embedding_of_ccm_extracted_cluster[:, 1],
        mode="markers",
        marker=dict(color="blue"),
        text=[genes[leaves][i] for i in leaves],
        hoverinfo="text",
    ),
    go.Scatter(
        x=embedding_of_ccm_extracted_cluster[tour, 0],
        y=embedding_of_ccm_extracted_cluster[tour, 1],
        mode="lines+markers",
        line=dict(color="red"),
        marker=dict(size=10, symbol="arrow", angleref="previous"),
    ),
]

fig = go.Figure(data=data, layout=layout)

fig.show()

IndexError: index 2931 is out of bounds for axis 0 with size 1146

In [None]:
with open("../LKH-3.0.10/umap.tour", "r") as f:
    for line in f:
        if line.startswith("TOUR_SECTION"):
            break
    tour = np.array([int(line) - 1 for line in f.readlines()[:-2]])


layout = go.Layout(
    title="UMAP of CCM genes",
    xaxis=dict(title="UMAP1"),
    yaxis=dict(title="UMAP2"),
    width=800,
    height=800,
)

data = [
    go.Scatter(
        x=embedding_of_ccm_extracted_cluster[tour, 0],
        y=embedding_of_ccm_extracted_cluster[tour, 1],
        mode="lines+markers",
        line=dict(color="red"),
        marker=dict(size=10, symbol="arrow", angleref="previous"),
        hoverinfo="skip",
        showlegend=False,
    ),
    go.Scatter(
        x=embedding_of_ccm_extracted_cluster[:, 0],
        y=embedding_of_ccm_extracted_cluster[:, 1],
        mode="markers",
        marker=dict(
            color=[
                "green"
                if i == tour[0]
                else "black"
                if i == tour[-1]
                else "yellow"
                if i in target_gene_indecies
                else "blue"
                for i in range(len(leaves))
            ],
            size=[10 if i == tour[0] or i == tour[-1] or i in target_gene_indecies else 6 for i in range(len(leaves))],
        ),
        text=[genes[i] for i in leaves],
        showlegend=False,
        hoverinfo="text",
    ),
]

fig = go.Figure(data=data, layout=layout)

fig.show()

In [None]:
signaling_pathway = genes[leaves[tour]]
signaling_pathway[:10]