### Load needed libraries

In [1]:
import os
import numpy as np
import anndata
import re
import pandas as pd
import seaborn as sns
import scanpy as sc
import matplotlib.pyplot as plt
from datetime import datetime
import warnings
%matplotlib inline
from helper_functions import *

sc.settings.n_jobs = 32
warnings.filterwarnings("ignore")

### Load needed datasets/files

In [None]:
pseudotime = np.load(os.path.join(pwd, "../Quantitative neuropathology/pseudotime.npy"))
ptime = pseudotime[:, np.argsort(pseudotime[1, :])]

In [None]:
pwd = os.getcwd()
quant_neuropath = pd.read_csv(os.path.join(pwd, 
                                           "../Quantitative neuropathology/input/MTG/processed/all_quant_neuropath_by_donor_pivoted.2022-05-25.csv"),
                              index_col=0)

pseudotime = np.load(os.path.join(pwd, "../Quantitative neuropathology/pseudotime.npy"))
ptime = pseudotime[:, np.argsort(pseudotime[1, :])]

# From Manuscript Supplementary Table 1
Donors = pd.read_excel(os.path.join(pwd, "input", "Figure 2 and Extended Data Figure 3 and 4", "Supplementary Table 1.xlsx"), sheet_name="SEA-AD_Cohort_Metadata")
Donors = Donors[['Donor ID', 'Last CASI Score', 'Overall AD neuropathological Change', 'Thal', 'Braak', 'CERAD score']]

# From https://sea-ad-single-cell-profiling.s3.amazonaws.com/index.html#MTG/RNAseq/
metadata = pd.read_csv(pwd, "input", "Figure 2 and Extended Data Figure 3 and 4", "SEAAD_MTG_RNAseq_all-nuclei_metadata.2024-02-13.csv")
metadata = metadata[['Donor ID', 'Continuous Pseudo-progression Score']]

### Figure 2c

In [None]:
qn = quant_neuropath.copy()
qn.drop(qn.columns[qn.columns.str.contains("Grey")],axis=1, inplace=True)
qn.index=qn['case number']
idx = np.array([np.where(qn.index ==int(c_))[0] for c_ in ptime[0, :]]).flatten()
qn = qn.iloc[idx, :]
qn['ptime'] = 1. - ptime[1, :]

In [None]:
plt.rcParams['figure.figsize'] = [20, 20]
a = sns.clustermap(-qn.corr(), cmap="RdBu", metric="correlation")

plt.savefig('output/Figure_2c_correlation_plot.pdf', bbox_inches='tight')
plt.show()

### Figure 2d, e

In [None]:
# Pre-processing

grey_matter = quant_neuropath.filter(like="Grey")
grey_matter.index = quant_neuropath['case number']
idx = np.array([np.where(grey_matter.index ==int(c_))[0] for c_ in ptime[0, :]]).flatten()

grey_matter = grey_matter.iloc[idx, :]
selection = ["percent 6e10 positive area_Grey matter", "percent AT8 positive area_Grey matter", 
            "percent pTDP43 positive area_Grey matter","percent aSyn positive area_Grey matter",
            "number of Hematoxylin positive nuclei per area_Grey matter", 
            "percent GFAP positive area_Grey matter",
            "number of Iba1 positive cells per area_Grey matter",
            "number of NeuN positive cells per area_Grey matter"]

mm = grey_matter[selection].apply(lambda x: (x-x.mean())/x.std(), axis = 0).to_numpy()
smooth_mm = mm.copy()
for i in [0, 1, 4,5,6,7]:
    smooth_mm[:, i] = movingaverage_nan(mm[:, i], 10)

In [None]:
# Creating Plots

plt.rcParams['figure.figsize'] = [5, 5]
plt.subplot(3,1,1)
sns.heatmap(-smooth_mm[::-1, :2].T, cmap="RdBu", yticklabels=selection[:2])
plt.subplot(3,1,2)
sns.heatmap(-smooth_mm[::-1, 2:4].T, cmap="RdBu", yticklabels=selection[2:4])
plt.subplot(3,1,3)
sns.heatmap(-smooth_mm[::-1, 4:].T, cmap="RdBu", yticklabels=selection[4:])
plt.savefig('output/Figure_2d_2e_PathologyCellComposition.pdf', bbox_inches='tight')
plt.show()

### Figure 2f

In [None]:
md = metadata.drop_duplicates().dropna(0).merge(Donors, on='Donor ID').sort_values('Continuous Pseudo-progression Score')
md = md.iloc[:, [2, 3, 4, 5]]
md = md.to_numpy()

In [None]:
my_dict = {'Not AD': 0.0, 'Low': 1.0, 'Intermediate': 2.0, 'High':3.0}
for i in np.arange(md.shape[0]):
    md[i, 1] = my_dict[md[i, 1]]

my_dict = {'Thal 0': 0.0, 'Thal 1': 1.0, 'Thal 2': 2.0, 'Thal 3':3.0,
      'Thal 4':4.0, 'Thal 5': 5.0}
for i in np.arange(md.shape[0]):
    md[i, 2] = my_dict[md[i, 2]]

my_dict = {'Braak 0':0.0, 'Braak II':1.0, 'Braak III':2.0, 
           'Braak IV':3.0, 'Braak V':4.0, 'Braak VI':5.0}
for i in np.arange(md.shape[0]):
    md[i, 3] = my_dict[md[i, 3]]

smooth_md = md.copy()
for i in np.arange(4):
    smooth_md[:, i] = movingaverage_nan(md[:, i], 10)


In [None]:
# Generate Figures

plt.rcParams['figure.figsize'] = [5, 0.6]
sns.heatmap(np.float64(-smooth_md[:, 0].reshape(84, 1)).T, cmap="RdBu", yticklabels=['CASI'])
plt.savefig('output/Figure_2f_DonorDemographicsA.pdf', bbox_inches='tight')
plt.show()

plt.rcParams['figure.figsize'] = [5, 2.5]
sns.heatmap(np.float64(-smooth_md[:, 1:]).T, cmap="RdBu", yticklabels=['ADNC', 'THAL', 'BRAAK'])
plt.savefig('output/Figure_2f_DonorDemographicsB.pdf', bbox_inches='tight')
plt.show()


### Figure 2 g, h, i

In [None]:
# Panel 2g Top
plt.rcParams['figure.figsize'] = [5, 4]

sel_names = ['number of 6e10 positive objects per area_Layer1', 'number of 6e10 positive objects per area_Layer2',
            'number of 6e10 positive objects per area_Layer3', 'number of 6e10 positive objects per area_Layer4',
            'number of 6e10 positive objects per area_Layer5-6']

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title("Plaque Number per area Cluster 3")

plt.savefig('output/Figure_2g_Top_PlaqueNumber.pdf', bbox_inches='tight')

In [None]:
# Panel 2g Bottom
plt.rcParams['figure.figsize'] = [5, 4]

sel_names = ['number of AT8 positive cells per area_Layer1', 'number of AT8 positive cells per area_Layer2', 
            'number of AT8 positive cells per area_Layer3', 'number of AT8 positive cells per area_Layer4', 
            'number of AT8 positive cells per area_Layer5-6', ]

for i in np.arange(5):
    plt.plot(1-ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title("Tangles per area Cluster 3")

plt.savefig('output/Figure_2g_Bottom_Tangles.pdf', bbox_inches='tight')

In [None]:
# Panel 2h
plt.rcParams['figure.figsize'] = [5, 4]
sel_names = ['average 6e10 positive object median diameter_Layer1', 'average 6e10 positive object median diameter_Layer2', 
             'average 6e10 positive object median diameter_Layer3', 'average 6e10 positive object median diameter_Layer4', 
             'average 6e10 positive object median diameter_Layer5-6']

for i in np.arange(5):
    plt.plot(ptime[1, ::-1], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title("Plaque size Cluster 3")

plt.savefig('output/Figure_2h_PlaqueSize.pdf', bbox_inches='tight')

In [None]:
# Panel 2i
plt.rcParams['figure.figsize'] = [5, 4]
sel_names = ['number of NeuN positive cells per area_Layer1', 'number of NeuN positive cells per area_Layer2',
            'number of NeuN positive cells per area_Layer3', 'number of NeuN positive cells per area_Layer4',
            'number of NeuN positive cells per area_Layer5-6',]

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title("Neurons per area Cluster 7")

plt.savefig('output/Figure_2i_NeuronsPerArea.pdf', bbox_inches='tight')


### Figure 2 g, h, i - GAM Calculation

In [None]:
qn = quant_neuropath.copy()
qn.drop(qn.columns[qn.columns.str.contains("Grey")],axis=1, inplace=True)

qn.index=qn['case number']
idx = np.array([np.where(qn.index ==int(c_))[0] for c_ in ptime[0, :]]).flatten()
qn = qn.iloc[idx, :]
qn['ptime'] = 1. - ptime[1, :]


qn.iloc[np.where(qn.index == 7237)[0], np.where(qn.columns=="percent of Iba1 and 6e10 positive co-localized objects_Layer2")[0]] = 0
qn.iloc[np.where(qn.index == 7237)[0], np.where(qn.columns=="percent of Iba1 and 6e10 positive co-localized objects_Layer4")[0]] = 0


qn['nothing']=0

formula = "nothing"
spline_var = "ptime"

In [None]:
# Figure 2g bottom - GAM Plaque size
sel_names = ['number of AT8 positive cells per area_Layer1', 'number of AT8 positive cells per area_Layer2', 
            'number of AT8 positive cells per area_Layer3', 'number of AT8 positive cells per area_Layer4', 
            'number of AT8 positive cells per area_Layer5-6', ]
generate_results(sel_names, 'output/Figure_2g_Bottom_Tangles-GAM.pdf')

In [None]:
# Figure 2g top - GAM Plaque size
sel_names = ['number of 6e10 positive objects per area_Layer1', 'number of 6e10 positive objects per area_Layer2',
            'number of 6e10 positive objects per area_Layer3', 'number of 6e10 positive objects per area_Layer4',
            'number of 6e10 positive objects per area_Layer5-6']

generate_results(sel_names, 'output/Figure_2g_Top_PlaqueNumber-GAM.pdf')

In [None]:
# Figure 2h - GAM Plaque size
sel_names = ['average 6e10 positive object median diameter_Layer1', 'average 6e10 positive object median diameter_Layer2', 
             'average 6e10 positive object median diameter_Layer3', 'average 6e10 positive object median diameter_Layer4', 
             'average 6e10 positive object median diameter_Layer5-6']
generate_results(sel_names, "output/Figure_2h_PlaqueSize-GAM.pdf")

In [None]:
# Figure 2I - GAM Neurons per area
sel_names = ['number of NeuN positive cells per area_Layer1', 'number of NeuN positive cells per area_Layer2',
            'number of NeuN positive cells per area_Layer3', 'number of NeuN positive cells per area_Layer4',
            'number of NeuN positive cells per area_Layer5-6',]
generate_results(sel_names, "output/Figure_2i_NeuronsPerArea-GAM.pdf")

### Extended Data Figure 4b,c

In [None]:
md = metadata.iloc[idx, [2, 4, 59, 70, 71, 72]]
md['ptime'] = 1. - ptime[1, :]
md_np = md.to_numpy()
md_np[md_np[:, 0] == 'Other / Special Interest', 0]='ADRC Clinical Core'
md_np = np.hstack([md_np, new_prs[:, None]])

plt.rcParams['figure.figsize'] = [5, 10]

plt.subplot(7, 1, 1)
plt.plot(1-ptime[1, :], md_np[:, 0], '.', c="black")

plt.subplot(7, 1, 2)
plt.plot(1-ptime[1, :],md_np[:, 1], '.', c="black")
xgrid, mean, stderr = preprocess_smooth("ptime", "age_at_death", md)
plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.1, color="black")
plt.plot(xgrid, mean, c="black")

plt.subplot(7, 1, 3)
plt.plot(1-ptime[1, :], md_np[:, 2], '.', c="black")
xgrid, mean, stderr = preprocess_smooth("ptime", "ch_lastcasiscore", md)
plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.1, color="black")
plt.plot(xgrid, mean, c="black")

plt.subplot(7, 1, 4)
plt.plot(1-ptime[1, :], md_np[:, 3], '.', c="black")
my_dict = {'Not AD': 3.0, 'Low': 2.0, 'Intermediate': 1.0, 'High':0.0}
md['numeric_adnc']=md['adneurochange']
md['numeric_adnc'] = md['numeric_adnc'].replace(my_dict)
xgrid, mean, stderr = preprocess_smooth("ptime", "numeric_adnc", md)
plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.1, color="black")
plt.plot(xgrid, mean, c="black")


plt.subplot(7, 1, 5)
plt.plot(1-ptime[1, :], md_np[:, 4], '.', c="black")
my_dict = {'Thal 0': 5.0, 'Thal 1': 4.0, 'Thal 2': 3.0, 'Thal 3':2.0,
      'Thal 4':1.0, 'Thal 5': 0.0}
md['numeric_thal']=md['thal']
md['numeric_thal'] = md['numeric_thal'].replace(my_dict)
xgrid, mean, stderr = preprocess_smooth("ptime", "numeric_thal", md)
plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.1, color="black")
plt.plot(xgrid, mean, c="black")


plt.subplot(7, 1, 6)
plt.plot(1-ptime[1, :], md_np[:, 5], '.', c="black")
my_dict = {'Braak 0':5.0, 'Braak II':4.0, 'Braak III':3.0, 
           'Braak IV':2.0, 'Braak V':1.0, 'Braak VI':0.0}
md['numeric_braak']=md['braak']
md['numeric_braak'] = md['numeric_braak'].replace(my_dict)
xgrid, mean, stderr = preprocess_smooth("ptime", "numeric_braak", md)
plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.1, color="black")
plt.plot(xgrid, mean, c="black")


plt.subplot(7, 1, 7)
plt.plot(1-ptime[1, :], md_np[:, 7], '.', c="black")
temp = md_np[:, 7]
temp[temp==0] = np.nan
md['prs'] = temp
xgrid, mean, stderr = preprocess_smooth("ptime", "prs", md)
plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.1, color="black")
plt.plot(xgrid, mean, c="black")


plt.savefig('output/Extended Data Figure 4bc_metadata_plots_v2.pdf', bbox_inches='tight')
plt.show()

### Extended Data Figure 4d

In [None]:
plt.rcParams['figure.figsize'] = [5, 4]

sel_names = ['number of pTDP43 positive cells per area_Layer1', 'number of pTDP43 positive cells per area_Layer2',
            'number of pTDP43 positive cells per area_Layer3', 'number of pTDP43 positive cells per area_Layer4',
            'number of pTDP43 positive cells per area_Layer5-6',]

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    plt.title("Cluster 1 Sparse pTDP-43")

plt.savefig("output/Extended Data Figure 4d_pTDP43_per_area.pdf", bbox_inches='tight')

### Extended Data Figure 4e

#### Top Left

In [None]:
plt.rcParams['figure.figsize'] = [5, 4]
sel_names = ['number of Hematoxylin positive nuclei per area_Layer1', 'number of Hematoxylin positive nuclei per area_Layer2',
            'number of Hematoxylin positive nuclei per area_Layer3', 'number of Hematoxylin positive nuclei per area_Layer4',
            'number of Hematoxylin positive nuclei per area_Layer5-6']

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title('number of Hematoxylin positive nuclei')

plt.savefig("output/Extended Data Figure 4e_TopLeftA_NumberHematoxilin.pdf", bbox_inches='tight')

In [None]:
plt.rcParams['figure.figsize'] = [5, 4]
sel_names = ['average Iba1 positive process length per cell_Layer1', 'average Iba1 positive process length per cell_Layer2',
            'average Iba1 positive process length per cell_Layer3', 'average Iba1 positive process length per cell_Layer4',
            'average Iba1 positive process length per cell_Layer5-6']

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title('average Iba1 positive process length per cell')

plt.savefig("output/Extended Data Figure 4e_TopLeftB_AvgIBA1processlength.pdf", bbox_inches='tight')

In [None]:
plt.rcParams['figure.figsize'] = [5, 4]

sel_names = ['number of inactivated Iba1 positive cells per area_Layer1', 'number of inactivated Iba1 positive cells per area_Layer2',
            'number of inactivated Iba1 positive cells per area_Layer3', 'number of inactivated Iba1 positive cells per area_Layer4',
            'number of inactivated Iba1 positive cells per area_Layer5-6']

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title('number of inactivated Iba1-ir cells per area')

plt.savefig("output/Extended Data Figure 4e_TopLeftC_numberInactivatedIBA1.pdf", bbox_inches='tight')

In [None]:
plt.rcParams['figure.figsize'] = [5, 4]
sel_names = ['percent Iba1 positive area_Layer1', 'percent Iba1 positive area_Layer2', 
            'percent Iba1 positive area_Layer3', 'percent Iba1 positive area_Layer4', 
            'percent Iba1 positive area_Layer5-6', ]

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title(sel_names[i])

plt.savefig("output/Extended Data Figure 4e_TopLeftD_percentIBA1.pdf", bbox_inches='tight')

#### Bottom Left

In [None]:
plt.rcParams['figure.figsize'] = [5, 4]

sel_names = ['number of positive aSyn cells per area_Layer1', 'number of positive aSyn cells per area_Layer2',
            'number of positive aSyn cells per area_Layer3', 'number of positive aSyn cells per area_Layer4',
            'number of positive aSyn cells per area_Layer5-6',]

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    plt.title('number of positive aSyn cells per area')

plt.savefig("output/Extended Data Figure 4e_BottomLeft_aSyn.pdf", bbox_inches='tight')

#### Top Right

In [None]:
plt.rcParams['figure.figsize'] = [5, 4]

sel_names = ['percent GFAP positive area_Layer1', 'percent GFAP positive area_Layer2',
            'percent GFAP positive area_Layer3', 'percent GFAP positive area_Layer4',
            'percent GFAP positive area_Layer5-6']

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title('percent GFAP positive area')

plt.savefig("output/Extended Data Figure 4e_TopRIght_percentGFAP.pdf", bbox_inches='tight')

#### Bottom Right

In [None]:
plt.rcParams['figure.figsize'] = [5, 4]
sel_names = ['average Hematoxylin positive nucleus area_Layer1', 'average Hematoxylin positive nucleus area_Layer2', 
            'average Hematoxylin positive nucleus area_Layer3', 'average Hematoxylin positive nucleus area_Layer4', 
            'average Hematoxylin positive nucleus area_Layer5-6', ]

for i in np.arange(5):
    plt.plot(1. - ptime[1, :], qn[sel_names[i]].to_numpy(), '.', c=sns.color_palette("hls", 5)[i])
    xgrid, mean, stderr = preprocess_smooth("ptime", sel_names[i], qn)
    plt.fill_between(xgrid, mean-2*stderr, mean+2*stderr,  alpha=0.01, color="black")
    plt.plot(xgrid, mean, c=sns.color_palette("hls", 5)[i])
    plt.title('average Hematoxylin positive nucleus area')

plt.savefig("output/Extended Data Figure 4e_BottomRight_avgHemNucArea.pdf", bbox_inches='tight')