In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "Arial"
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42


os.chdir("/media/kyle_storage/kyle_ferchen/grimes_lab_main/analysis/2023_06_07_plot_data_over_transcriptome_r7_merged_umap/")

In [None]:
# Read in consistent R7 color definitions
path_r7_colors = "/media/kyle_storage/kyle_ferchen/grimes_lab_main/analysis/"\
    "2022_12_07_new_multilin_panel/output/automated_py3_cellharmony_r7/"\
    "r7_clusters_to_colors_defined_2023_07_13.csv"
r7_colors = pd.read_csv(path_r7_colors, index_col=0, header=None).iloc[:,0]
r7_colors.index.name = None

r7_colors

In [None]:
map_r7_names = pd.read_csv("input/map_r7-v1_to_r7-v2_names.csv", 
    index_col=0).iloc[:,0]

In [None]:
## Read in UMAP of R7 data
umap = pd.read_table(\
    "input/cite_hive_integration_umap/hive_cite_umap_coordinates.tsv",
    index_col=0)

umap_anno = pd.read_table(\
        "input/cite_hive_integration_umap/groups.r7-10x-hive.txt",
    header=None, index_col=0)
umap_anno.index.name = None
umap_anno.columns = ["port", "cluster"]
umap_anno.head()

In [None]:
hive_anno = pd.read_table(\
    "input/cite_hive_integration_umap/hive_cellharmony_results.txt",
    index_col=0).iloc[:,-2]

hive_anno.index = [item.replace("KF.", "") for item in hive_anno.index.values]

def rename_hive_cell_info(x):
    tmp_x = x.split("_")
    return(tmp_x[0] + "." + tmp_x[-1])

hive_anno = pd.DataFrame({\
        "port": [item.split("_")[-1] for item in hive_anno.index.values],
        "cluster": [map_r7_names[item] for item in hive_anno.values]},
    index=[rename_hive_cell_info(item) for item in hive_anno.index.values])

hive_anno.head()

In [None]:
cite_hive_anno = pd.concat([umap_anno, hive_anno])
cite_hive_anno.head()

In [None]:
umap["cluster"] = cite_hive_anno.loc[umap.index.values, "cluster"]
umap.head()

In [None]:
umap_centroids = pd.pivot_table(\
    umap, 
    index="cluster", 
    values=["UMAP_1", "UMAP_2"], 
    aggfunc=np.mean)
umap_centroids.head()

In [None]:
umap_bounds = pd.concat([\
        pd.Series(umap.iloc[:,:2].min(axis=0), name="min"),
        pd.Series(umap.iloc[:,:2].max(axis=0), name="max")],
    axis=1)
padding = 0.05 * (umap_bounds["max"] - umap_bounds["min"])
umap_bounds["min"] = umap_bounds["min"] - padding
umap_bounds["max"] = umap_bounds["max"] + padding
umap_bounds

In [None]:
cite_hive_anno["port"].value_counts()

In [None]:
cite_hive_anno["tech"] = "10X"
mask_hive = cite_hive_anno["port"].isin(["EOS", "kit2", "ML1", "ML2", "kit1", "BMCP"])
cite_hive_anno.loc[mask_hive,"tech"] = "HIVE"

In [None]:
colors_10x_hive = {"10X": "#e0e0e0", "HIVE": "#cf4927"}

In [None]:
plt.close("all")

fig, ax = plt.subplots(figsize=(12,12))

# Filter to cells in titrated mix dataset
filtered_cells = np.intersect1d(umap.index.values, cite_hive_anno.index.values)

# Filter umap and order
seg_anno = umap.loc[filtered_cells]
seg_anno["Technology"] = cite_hive_anno.loc[filtered_cells, "tech"].values
seg_anno["tmp_order"] = seg_anno["Technology"].replace({\
    "10X": 1,
    "HIVE": 2})
seg_anno = seg_anno.loc[seg_anno.sample(seg_anno.shape[0]).index.values,:]
# seg_anno = seg_anno.sort_values(by="tmp_order")

# Define color vector for plotting by port
tmp_color_vector = seg_anno["Technology"].replace(\
    colors_10x_hive).values

ax.scatter(\
    seg_anno["UMAP_1"].values,
    seg_anno["UMAP_2"].values,
    c = tmp_color_vector,
    alpha = 0.5,
    s = 8,
    edgecolors=None)
ax.set_xlim(umap_bounds.iloc[0,:].values)
ax.set_ylim(umap_bounds.iloc[1,:].values)
ax.spines.right.set_visible(False)
ax.spines.top.set_visible(False)
ax.spines.bottom.set_visible(False)
ax.spines.left.set_visible(False)
ax.set_xticks([])
ax.set_yticks([])

plt.savefig("output/main_figure_1_plots/"\
    "integrated_umap_highlight_10x_vs_hive_shuffled.png",
    dpi=800)

In [None]:
plt.close("all")

fig, ax = plt.subplots(figsize=(12,12))

# Filter to cells in titrated mix dataset
filtered_cells = np.intersect1d(umap.index.values, cite_hive_anno.index.values)

# Filter umap and order
seg_anno = umap.loc[filtered_cells]
seg_anno["Technology"] = cite_hive_anno.loc[filtered_cells, "tech"].values
seg_anno["tmp_order"] = seg_anno["Technology"].replace({\
    "10X": 1,
    "HIVE": 2})
seg_anno = seg_anno.loc[seg_anno.sample(seg_anno.shape[0]).index.values,:]
seg_anno = seg_anno.sort_values(by="tmp_order")

# Define color vector for plotting by port
tmp_color_vector = seg_anno["Technology"].replace(\
    colors_10x_hive).values

ax.scatter(\
    seg_anno["UMAP_1"].values,
    seg_anno["UMAP_2"].values,
    c = tmp_color_vector,
    alpha = 0.5,
    s = 8,
    edgecolors=None)
ax.set_xlim(umap_bounds.iloc[0,:].values)
ax.set_ylim(umap_bounds.iloc[1,:].values)
ax.spines.right.set_visible(False)
ax.spines.top.set_visible(False)
ax.spines.bottom.set_visible(False)
ax.spines.left.set_visible(False)
ax.set_xticks([])
ax.set_yticks([])

plt.savefig("output/main_figure_1_plots/"\
    "integrated_umap_highlight_10x_vs_hive_ordered.png",
    dpi=800)

In [None]:
plt.close("all")

fig, ax = plt.subplots(figsize=(12,12))

# Filter to cells in titrated mix dataset
filtered_cells = np.intersect1d(\
    umap.index.values, 
    cite_hive_anno.loc[\
        cite_hive_anno["port"].isin([\
            "AS_3CITE_Kitx2", 
            "AS_CITE_CD127", 
            "AS_3CITE_Kit", 
            "AS_CITE_HSC", 
            "AS_CITE_Multilin1", 
            "AS_CITE_Multilin2"])].index.values)

# Filter umap and order
seg_anno = umap.loc[filtered_cells]
seg_anno["cluster"] = cite_hive_anno.loc[filtered_cells, "cluster"].values
seg_anno = seg_anno.loc[seg_anno.sample(seg_anno.shape[0]).index.values,:]

# Define color vector for plotting by port
tmp_color_vector = seg_anno["cluster"].replace(\
    r7_colors.to_dict()).values

ax.scatter(\
    seg_anno["UMAP_1"].values,
    seg_anno["UMAP_2"].values,
    c = tmp_color_vector,
    alpha = 0.25,
    s = 8,
    edgecolors=None)
ax.set_xlim(umap_bounds.iloc[0,:].values)
ax.set_ylim(umap_bounds.iloc[1,:].values)
ax.spines.right.set_visible(False)
ax.spines.top.set_visible(False)
ax.spines.bottom.set_visible(False)
ax.spines.left.set_visible(False)
ax.set_xticks([])
ax.set_yticks([])

plt.savefig("output/main_figure_1_plots/"\
    "curated_cite_seq_with_r7_cluster_colors_umap.png",
    dpi=800)

In [None]:
seg_anno

In [None]:
plt.close("all")

fig, ax = plt.subplots(figsize=(12,12))

# Filter to cells in titrated mix dataset
filtered_cells = np.intersect1d(umap.index.values, cite_hive_anno.index.values)

# Filter umap and order
seg_anno = umap.loc[filtered_cells]
seg_anno["Technology"] = cite_hive_anno.loc[filtered_cells, "tech"].values
idx_hive = seg_anno.loc[seg_anno["Technology"] == "HIVE"].index.values
seg_anno["10X_v_HIVE"] = seg_anno["Technology"].values
seg_anno.loc[idx_hive, "10X_v_HIVE"] = cite_hive_anno.loc[\
    idx_hive, "port"].values
seg_anno["10X_v_HIVE"] = seg_anno["10X_v_HIVE"].replace({\
    "kit1": "Kit",
    "kit2": "Kit",
    "ML1": "MultiLin",
    "ML2": "MultiLin"})

map_capture_to_color = {\
    "10X": "#DADADA",
    "MultiLin": "#FF00F0",
    "BMCP": "#0070FF",
    "Kit": "#00FF0F",
    "EOS": "#FF8F00"}


seg_anno["tmp_order"] = seg_anno["Technology"].replace({\
    "10X": 1,
    "HIVE": 2})



seg_anno = seg_anno.sample(seg_anno.shape[0]).sort_values(by="tmp_order")

# Define color vector for plotting by port
tmp_color_vector = seg_anno["10X_v_HIVE"].replace(\
    map_capture_to_color).values

ax.scatter(\
    seg_anno["UMAP_1"].values,
    seg_anno["UMAP_2"].values,
    c = tmp_color_vector,
    alpha = 0.5,
    s = 8,
    edgecolors=None)
ax.set_xlim(umap_bounds.iloc[0,:].values)
ax.set_ylim(umap_bounds.iloc[1,:].values)
ax.spines.right.set_visible(False)
ax.spines.top.set_visible(False)
ax.spines.bottom.set_visible(False)
ax.spines.left.set_visible(False)
ax.set_xticks([])
ax.set_yticks([])

plt.savefig("output/main_figure_1_plots/"\
    "curated_cite_seq_with_hive_vs_10x_comparison.png",
    dpi=600)

In [None]:
import matplotlib

fig, ax = plt.subplots(figsize=(12, 1), layout='constrained')

cmap = matplotlib.cm.cool
norm = matplotlib.colors.Normalize(vmin=0, vmax=1)

fig.colorbar(matplotlib.cm.ScalarMappable(norm=norm, cmap="jet"),
             cax=ax, orientation='horizontal', label='Relative Expression')

plt.savefig("output/main_figure_1_plots/"\
    "jet_colorbar_for_relative_expression_0_1.pdf")