In [None]:
# script to load cell information from QuPath project, cluster data with eg. Phenograph
# and add new measurement into cell measurements 
# using documentation from https://paquo.readthedocs.io/ & https://github.com/dpeerlab/PhenoGraph
#
# ewelina.bartoszek@unibas.ch with help from @tibuch & @lacan
# 20241122

In [None]:
#import libraries

from paquo.projects import QuPathProject
import pandas as pd
import anndata as ad
import numpy as np
from matplotlib import pyplot as plt
import phenograph
import scanpy as sc


In [None]:
# Import QuPath Project
qp = QuPathProject('path to folder/project.qpproj', mode='a')


In [None]:
#loop over all images

allCells = []
for image in qp.images:
    detections = image.hierarchy.detections  # detections are stored in a set like proxy object
    df = pd.DataFrame(detection.measurements for detection in detections)  # put the measurements dictionary for each detection into a pandas DataFrame
    df["imageN"]=image.image_name
    allCells.append(df)
allCells = pd.concat(allCells)

#positions=pd.DataFrame

In [None]:
display(allCells)

In [None]:
df1=allCells.filter(regex='.*Cell: Mean$')
display(df1)

In [None]:
#for a loop
# Keep only cell measurements
df1=allCells.filter(regex='.*Cell: Mean$')
df2=df1.loc[:, ['Marker1: Cell: Mean',  'Marker2: Cell: Mean', 'Marker3: Cell: Mean','Marker4: Cell: Mean', 'Marker5: Cell: Mean']]
display(df2)

In [None]:
# Convert to AnnData for use with PhenoGraph
adata = ad.AnnData(df2)
display(adata)

In [None]:
adata = sc.tl.pca(adata, n_comps = 3, copy = True)

In [None]:
# Cumulative variance explained:
cml_var_explained = np.cumsum(adata.uns['pca']['variance_ratio'])
x = range(len(adata.uns['pca']['variance_ratio']))
y = cml_var_explained
plt.scatter(x, y, s=4)
plt.xlabel('PC')
plt.ylabel('Cumulative variance explained')
plt.title('Cumulative variance explained by PCs')
plt.show()

In [None]:
#display(adata.X)

In [None]:
# Finally PhenoGraph
# define number of neighbours
k=60  
communities, graph, Q = phenograph.cluster(adata.X, k=k)
# store the results in adata:
adata.obs['PhenoGraph_clusters'] = pd.Categorical(communities)
adata.uns['PhenoGraph_Q'] = Q
adata.uns['PhenoGraph_k'] = k

In [None]:
sc.settings.verbosity = 0            # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
print("phenograph==", phenograph.__version__)
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [None]:
#Neigbor and UMAP analysis
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=10)
sc.tl.umap(adata)
sc.pl.umap(
    adata, 
    color=['PhenoGraph_clusters'],
    palette='tab20', # 'palette' specifies the colormap to use)
    title=["Clusters"]
)

In [None]:
# Umap per channel
sc.pl.umap(adata, color = ['Marker1: Cell: Mean',  'Marker2: Cell: Mean', 'Marker3: Cell: Mean','Marker4: Cell: Mean', 'Marker5: Cell: Mean'])

In [None]:
# change the name of the 'cluster' if you like the measurement name 
allCells['cluster'] = communities

In [None]:
NewDet = allCells.query(f"imageN == '{image.image_name}'")
display(NewDet)

In [None]:
#loop over all images to save image entry and save a new column with phenograph ID

for image in qp.images:
    NewDet = allCells.query(f"imageN == '{image.image_name}'")
    detections= image.hierarchy.detections
    assert len(detections) == len(NewDet), 'What?!'
    # Send back to QuPath and save
    for (detection, result) in zip(detections, NewDet['cluster']):
        detection.measurements['NewCluster'] = float(result)
        
    image.save()
#qp.save()
    