In [None]:
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import kneighbors_graph
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
from anndata import AnnData
import pandas as pd
import numpy as np
import seaborn as sns
import colorcet as cc
import scanpy as sc
import igraph as ig
import colorcet as cc
import leidenalg
import umap
import copy
import os

from functions import map_scatter, glasbey

In [None]:
# read in dimred_clstr data
data_path = os.path.join('..', 'outputs', 'dimred_clstr.csv')

if os.path.exists(data_path):
    df = pd.read_csv(data_path)
else:
    print(f"File not found: {data_path}.\nCheck data path.")

In [None]:
metadata = ['Image', 'Object ID', 'Classification', 'Parent', 'Centroid X µm', 'Centroid Y µm', 'UMAP1', 'UMAP2', 'PC1', 'PC2', 'Cluster_UMAP']
markers = [col for col in df.columns if col not in metadata]
print(markers)

In [None]:
# melt the dataframe so that all markers are in one column called 'Intensity' 
melted = df.melt(id_vars=metadata, var_name="Marker", value_name="Intensity")
melted.head()

In [None]:
# plot heatmap of each marker on top of the UMAP
f = sns.FacetGrid(melted, col="Marker", col_wrap=5)
f = f.map_dataframe(
    map_scatter, "UMAP1", "UMAP2", "Intensity", s=1, cmap="viridis", vmin=0
)

In [None]:
# plot heatmap of each marker on top of the PCA
f = sns.FacetGrid(melted, col="Marker", col_wrap=5)
f = f.map_dataframe(
    map_scatter, "PC1", "PC2", "Intensity", s=1, cmap="viridis", vmin=0
)

In [None]:
# dot plot of the clusters and the markers present
X = df[markers].values  # marker data as numpy array
obs = df[metadata].copy()     # metadata as a dataframe

# can use Cluster, Patient, etc. here
obs['Cluster_UMAP'] = obs['Cluster_UMAP'].astype('category')

adata = AnnData(X=X, obs=obs)

adata.var_names = markers
adata.var = pd.DataFrame(index=markers)

# make the dotplot
sc.pl.dotplot(adata, 
              var_names=markers,   # The list of markers to show on the x-axis
              groupby='Cluster_UMAP',
              title='Marker proportions in each cluster (from UMAP)'
              ) 

In [None]:
# dot plot of lymph node and the markers present
X = df[markers].values  # marker data as numpy array
obs = df[metadata].copy()     # metadata as a dataframe

# can use Cluster, Patient, etc. here
obs['Parent'] = obs['Parent'].astype('category')

adata = AnnData(X=X, obs=obs)

adata.var_names = markers
adata.var = pd.DataFrame(index=markers)

# make the dotplot
sc.pl.dotplot(adata, 
              var_names=markers,   # The list of markers to show on the x-axis
              groupby='Parent',
              title='Marker proportions in each lymph node'
              ) 

In [None]:
# for each lymph node, plot cluster on top of the xy coordinates
grouped = df.groupby('Parent')
for name, group in grouped:
    # plot the xy coordinates 
    f, ax = plt.subplots(figsize=(10, 10))

    sns.scatterplot(
        x="Centroid X µm", 
        y="Centroid Y µm",
        hue="Cluster",
        legend="full",
        palette="rainbow",
        data=group,
        ax=ax,
        s=10
    ).set(title=f'Lymph node {name} xy plot')

    sns.despine()
    ax.legend(bbox_to_anchor=(1.05, 1), loc=2, markerscale=1, borderaxespad=0.)
    plt.show()

In [None]:
# for each lymph node, plot classification on top of the xy coordinates
grouped = df.groupby('Parent')
for name, group in grouped:
    # plot the xy coordinates 
    f, ax = plt.subplots(figsize=(10, 10))

    sns.scatterplot(
        x="Centroid X µm", 
        y="Centroid Y µm",
        hue="Classification",
        legend="full",
        palette="rainbow",
        data=group,
        ax=ax,
        s=10
    ).set(title=f'Lymph node {name} xy plot')

    sns.despine()
    ax.legend(bbox_to_anchor=(1.05, 1), loc=2, markerscale=1, borderaxespad=0.)
    plt.show()

In [None]:
# heatmap of markers and lymph nodes, can see diff between pre and post
# melt data
df_long = pd.melt(df, id_vars=['Parent'], value_vars=markers, var_name='Marker', value_name='Intensity')

plt.figure(figsize=(10, 8))
sns.heatmap(df_long.pivot_table(index='Parent', columns='Marker', values='Intensity', aggfunc='mean'), cmap='coolwarm')
plt.xlabel('Markers')
plt.ylabel('Lymph Node')
plt.title('Heatmap of Marker Intensities Grouped by Lymph Node')
plt.show()