This notebook takes the Anndata object produced from the adata_convertion notebook and produces 3D PHATE plots, saves a range of plots to a powerpoint, and ultimately produces a modified Anndata object with the 3D PHATE coordinates appended for downstream use.

In [None]:
import pickle
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn import preprocessing
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
import pandas as pd
import phate
import math
import random
import gc
import scprep
from datetime import datetime, time
from matplotlib.animation import ImageMagickWriter
import matplotlib.animation as animation
import zipfile
from urllib.request import urlopen
import scipy.stats as st
from scipy.stats import norm
from scipy.stats import gaussian_kde
from scipy.stats import kde
from scipy.stats import binned_statistic
from scipy.stats import f_oneway
from matplotlib.colors import LogNorm
from mpl_toolkits.axes_grid1 import make_axes_locatable
plt.rcParams['pdf.fonttype'] = 42
print(sns.__version__)
from anndata import AnnData
import scanpy as sc
from delve import *
import anndata as ad
from sklearn.preprocessing import MinMaxScaler
from kh import sketch
from sklearn.cluster import KMeans
import umap
print(sc.__version__)
today = datetime.now().strftime("%m%d%Y-%H%M")

In [2]:
#Read back in the subsampled adata file
adata_save_path = r'your/save/path/here.h5ad'
standard_adata_sub_no_sparse = anndata.read_h5ad(adata_save_path)

Remove total protein values for computing the PHATE structure. This avoids oversampling the nuclear area measurement.

In [3]:
# Adjusted list comprehension to exclude var_names containing "total"
columns_to_keep = [name for name in standard_adata_sub_no_sparse.var_names if "total_nuc_protein" not in name]

# Selecting the data with only the columns to keep
standard_trimmed_noPSTAT5_noTotal_adata_sub = standard_adata_sub_no_sparse[:, columns_to_keep]


In [4]:
def laplacian_score_fs(adata = None,
                    k: int  = None,
                    n_jobs: int  = -1):

    X, feature_names, obs_names = parse_input(adata)
    W = construct_affinity(X = X, k = k, n_jobs = n_jobs)
    scores = laplacian_score(X = X, W = W)
    predicted_features = pd.DataFrame(scores, index = feature_names, columns = ['laplacian_score'])
    predicted_features = predicted_features.sort_values(by = 'laplacian_score', ascending = True)

    return predicted_features 

In [5]:
l_score_standard = laplacian_score_fs(standard_adata_sub_no_sparse, k = 100)

In [None]:
len(l_score_standard)

In [None]:
l_score_standard.index[:46]

For plotting 3D PHATE structures

In [None]:
import os
import numpy as np
import phate
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from pptx import Presentation
from pptx.util import Inches
from io import BytesIO

# Define parameter ranges
count_list = [50, 100]
index_ranges = [10, 20]
t_values = ['auto']
gamma_values = [1]

# Create a PowerPoint presentation object
presentation = Presentation()

# Loop over each combination of parameters
for count in count_list:
    for index_range in index_ranges:
        for t in t_values:
            for gamma in gamma_values:
                # Initialize PHATE operator with varying t and gamma, projecting to three dimensions
                phate_op = phate.PHATE(knn=count, t=t, gamma=gamma, n_components=3)

                # Fit-transform PHATE
                X_phate = phate_op.fit_transform(
                    standard_adata_sub_no_sparse.X[:, np.isin(standard_adata_sub_no_sparse.var_names, l_score_standard.index[:index_range])])
                
                # Set PHATE result for plotting
                standard_adata_sub_no_sparse.obsm['X_phate'] = X_phate
                
                # Generate plot title
                plot_title = f'PHATE: Neighbors={count}, Index Range={index_range}, t={t}, gamma={gamma}'

                # Define angles for rotation in degrees
                angles = [(30, 30), (30, 120), (30, 210), (30, 300)]

                # Define unique labels and their corresponding colors
                labels = np.unique(standard_adata_sub_no_sparse.obs['treatment'])
                color_map = plt.get_cmap('viridis', len(labels))
                colors = color_map(np.linspace(0, 1, len(labels)))
                color_dict = dict(zip(labels, colors))

                # Create a new slide for the current set of plots
                slide = presentation.slides.add_slide(presentation.slide_layouts[5])
                title = slide.shapes.title
                title.text = plot_title

                for i, (elev, azim) in enumerate(angles, start=1):
                    fig = plt.figure()
                    ax = fig.add_subplot(111, projection='3d')
                    # Plot each label with a unique color
                    for label, color in color_dict.items():
                        idx = standard_adata_sub_no_sparse.obs['treatment'] == label
                        ax.scatter(X_phate[idx, 0], X_phate[idx, 1], X_phate[idx, 2], color=color, label=label, s=5)
                    
                    ax.view_init(elev=elev, azim=azim)
                    ax.set_title(plot_title)
                    ax.legend(title='Treatment', bbox_to_anchor=(1.05, 1), loc='upper left')

                    # Save the plot to a BytesIO object
                    img_stream = BytesIO()
                    plt.savefig(img_stream, format='png', bbox_inches='tight')
                    plt.close()
                    img_stream.seek(0)
                    
                    # Add the plot to the current slide in a 2x2 grid
                    left = Inches(1 + ((i-1) % 2) * 4)
                    top = Inches(1.5 + ((i-1) // 2) * 3.5)
                    slide.shapes.add_picture(img_stream, left, top, width=Inches(3.5))

# Save the PowerPoint presentation
pptx_filename = 'phate_plots_presentation.pptx'
presentation.save(pptx_filename)

print(f"3D PHATE plots saved with multiple angles and legends. PowerPoint presentation saved as {pptx_filename}.")


After you've decided on a final structure, you can save the ADATA file. It should have the PHATE coordinates saved as part of it now, removing the need to recompute the phate structure if you want to plot different things onto it later

In [None]:
#Save the entire adata file with new PHATE embeddings
adata_save_path = r'my\save\path\standard_adata_sub_sub.h5ad'
standard_adata_sub_no_sparse.write_h5ad(adata_save_path)