In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from adjustText import adjust_text

os.chdir("/media/kyle_storage/kyle_ferchen/grimes_lab_main/analysis/2023_06_07_plot_data_over_transcriptome_r7_merged_umap/")

In [None]:
map_r7_names = pd.read_csv("input/map_r7-v1_to_r7-v2_names.csv", 
    index_col=0).iloc[:,0]

In [None]:
## Read in UMAP of R7 data
umap = pd.read_table(\
    "input/cite_hive_integration_umap/hive_cite_umap_coordinates.tsv",
    index_col=0)

umap_anno = pd.read_table(\
        "input/cite_hive_integration_umap/groups.r7-10x-hive.txt",
    header=None, index_col=0)
umap_anno.index.name = None
umap_anno.columns = ["port", "cluster"]
umap_anno.head()

In [None]:
hive_anno = pd.read_table(\
    "input/cite_hive_integration_umap/hive_cellharmony_results.txt",
    index_col=0).iloc[:,-2]

hive_anno.index = [item.replace("KF.", "") for item in hive_anno.index.values]

def rename_hive_cell_info(x):
    tmp_x = x.split("_")
    return(tmp_x[0] + "." + tmp_x[-1])

hive_anno = pd.DataFrame({\
        "port": [item.split("_")[-1] for item in hive_anno.index.values],
        "cluster": [map_r7_names[item] for item in hive_anno.values]},
    index=[rename_hive_cell_info(item) for item in hive_anno.index.values])

hive_anno.head()

In [None]:
cite_hive_anno = pd.concat([umap_anno, hive_anno])
cite_hive_anno.head()

In [None]:
umap["cluster"] = cite_hive_anno.loc[umap.index.values, "cluster"]
umap.head()

In [None]:
umap_centroids = pd.pivot_table(\
    umap, 
    index="cluster", 
    values=["UMAP_1", "UMAP_2"], 
    aggfunc=np.mean)
umap_centroids.head()

In [None]:
umap_bounds = pd.concat([\
        pd.Series(umap.iloc[:,:2].min(axis=0), name="min"),
        pd.Series(umap.iloc[:,:2].max(axis=0), name="max")],
    axis=1)
padding = 0.05 * (umap_bounds["max"] - umap_bounds["min"])
umap_bounds["min"] = umap_bounds["min"] - padding
umap_bounds["max"] = umap_bounds["max"] + padding
umap_bounds

In [None]:
fig, ax = plt.subplots(figsize=(12,12))

ax.scatter(umap.iloc[:,0], umap.iloc[:,1], color="gray", alpha=0.01,
    edgecolors=None, s=4)
ax.scatter(umap_centroids.iloc[:,0].values, umap_centroids.iloc[:,1].values,
    color="red", edgecolors="black")
texts = [ax.text(\
    umap_centroids.loc[item][0],
    umap_centroids.loc[item][1],
    item) for item in umap_centroids.index.values]
adjust_text(\
    texts, 
    x=umap_centroids.iloc[:,0].values, 
    y=umap_centroids.iloc[:,1].values,
    arrowprops=dict(arrowstyle='->', color='blue'))
ax.set_xlim(umap_bounds.iloc[0,:].values)
ax.set_ylim(umap_bounds.iloc[1,:].values)
ax.spines.right.set_visible(False)
ax.spines.top.set_visible(False)
ax.spines.bottom.set_visible(False)
ax.spines.left.set_visible(False)
ax.set_xticks([])
ax.set_yticks([])

In [None]:
print(cite_hive_anno["port"].value_counts().to_string())

In [None]:
# Plot ports defining sorted populations from cite-seq titration experiment
map_port_to_gate_color = {\
    "AS_CITE_Kit": "#EBC856",
    "AS_CITE_CD127": "#54D444",
    "AS_CITE_HSC": "#4FCCF7",
    "AS_CITE_Multilin1": "#9233F5",
    "AS_CITE_Multilin2": "#9233F5"}

In [None]:
plt.close("all")

fig, ax = plt.subplots(figsize=(12,12))

# Filter to cells in titrated mix dataset
filtered_cells = cite_hive_anno.loc[\
    cite_hive_anno["port"].isin(\
        list(map_port_to_gate_color.keys()))].index.values

# Filter umap and order
seg_anno = umap.loc[filtered_cells]
seg_anno["port"] = cite_hive_anno.loc[filtered_cells, "port"].values
seg_anno["tmp_order"] = seg_anno["port"].replace({\
    "AS_CITE_Kit": 5,
    "AS_CITE_CD127": 1,
    "AS_CITE_HSC": 2,
    "AS_CITE_Multilin1": 3,
    "AS_CITE_Multilin2": 4})
seg_anno = seg_anno.sort_values(by="tmp_order")

# Define color vector for plotting by port
tmp_color_vector = seg_anno["port"].replace(\
    map_port_to_gate_color).values

ax.scatter(\
    seg_anno["UMAP_1"].values,
    seg_anno["UMAP_2"].values,
    c = tmp_color_vector,
    alpha = 0.5,
    s = 8,
    edgecolors=None)
ax.set_xlim(umap_bounds.iloc[0,:].values)
ax.set_ylim(umap_bounds.iloc[1,:].values)
ax.spines.right.set_visible(False)
ax.spines.top.set_visible(False)
ax.spines.bottom.set_visible(False)
ax.spines.left.set_visible(False)
ax.set_xticks([])
ax.set_yticks([])

plt.savefig("output/integrated_umap_titrated_cite_seq_umap_plot_ports.pdf")