# prepare workspace

Code adapted from: https://github.com/BayraktarLab/CountCorrect

In [None]:
# Import relevant packages
import numpy as np
import pandas as pd
from matplotlib import rcParams
import os
import scanpy as sc
import anndata as ad
import gseapy as gp

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# For nice color schemes
import cmocean

# For barplots
import seaborn as sns
from statannot import add_stat_annotation

import glob
import torch
import anndata

In [None]:
# Note need to downgrade numba and numpy to get this to import properly
# Numpy 1.21.4, Numba 0.53.0
import countcorrect as cc

In [None]:
# Show full pandas dataframe when produced
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [None]:
# Set fontsize
plt.rcParams.update({'font.size': 15})

In [None]:
# set wd
os.chdir('/hpc/group/goldsteinlab/Python/ONB/GeoMx')

# read data

Note: these raw count matrices were produced from the "demoData" object that was created by reading in raw DCC, PKC, and annotation files via the readNanoStringGeoMxSet() function in R. See R notebook "9_GeoMx_pre-processing" for more details

In [None]:
# First read in relevant dataframes (these were created using Sanger GeoMx pipeline in R)

# Counts matrix no negative probes
counts = pd.read_csv('ONB_TargetCountMatrix_no_negatives.csv')

# Negative probes counts matrix
neg_counts = pd.read_csv('ONB_negative_probe_count.csv')

# Metadata
meta = pd.read_csv('ONB_Segment_metadata.csv')

In [None]:
# Rename so fist column is same for both
counts.rename(columns= {'TargetName': 'Gene'}, inplace=True)

In [None]:
# Concatenate counts and neg probe dfs
counts_comb = pd.concat([counts, neg_counts], ignore_index=True)
counts_comb.set_index('Gene', inplace=True)

In [None]:
# Transpose for adata
counts_comb=counts_comb.T

In [None]:
# Extract values for anndata object
data_matrix=counts_comb.values

In [None]:
# Create anndata object
adata = anndata.AnnData(data_matrix)

adata.var_names=counts_comb.columns
adata.obs_names=counts_comb.index

for column_name in meta.columns:
    adata.obs[column_name]=meta[column_name].values

In [None]:
# Rename some of the obs
adata.obs['slide'] = adata.obs['Scan_name']
del adata.obs['Scan_name']

# prep for countcorrect

In [None]:
# Need to subset out the blank control for count-correct to work (ie well A1)
remove = ['DSP-1001660018527-A-A01']

mask = (~adata.obs_names.isin(remove))

adata = adata[mask, :]

In [None]:
counts_geneProbes = adata.X[:,adata.var_names != 'NegProbe-WTX']
counts_negativeProbes = adata.X[:,adata.var_names == 'NegProbe-WTX']
counts_nuclei = adata.obs['nuclei']
names_slides = adata.obs['slide']
print('Gene Probe Counts: \n \n', counts_geneProbes, '\n')
print('Negative Probe Counts: \n \n', counts_negativeProbes, '\n')
print('Nuclei Counts: \n \n', counts_nuclei, '\n')
print('Slide Names: \n \n', names_slides, '\n')

# run countcorrect

In [None]:
results = cc.run_countcorrect(counts_geneProbes, counts_negativeProbes, counts_nuclei, slide_id = names_slides,
                             total_iterations = 10000)

In [None]:
results

In [None]:
rawCounts_corrected = results['RawCounts']
cpm_normalized_corrected = results['NormCounts']

In [None]:
# Need to first remove NegProbe-WTX from adata before adding corrected counts layers
var_names_to_remove = ['NegProbe-WTX']

adata_f = adata[:, ~adata.var_names.isin(var_names_to_remove)]

In [None]:
# transfer over layers
adata_f.layers["raw_corrected"]=rawCounts_corrected
adata_f.layers["cpm_normalized"]=cpm_normalized_corrected 

# quantile normalization

In [None]:
# Define function for quantile normalization
def quantile_normalize(df):
    """
    input: dataframe with numerical columns
    output: dataframe with quantile normalized values
    """
    df_sorted = pd.DataFrame(np.sort(df.values,
                                     axis=0), 
                             index=df.index, 
                             columns=df.columns)
    df_mean = df_sorted.mean(axis=1)
    df_mean.index = np.arange(1, len(df_mean) + 1)
    df_qn =df.rank(method="min").stack().astype(int).map(df_mean).unstack()
    return(df_qn)

In [None]:
df=pd.DataFrame(adata_f.layers['raw_corrected'], index=adata_f.obs_names, columns=adata_f.var_names)

In [None]:
adata_f.layers['quantile_norm'] = quantile_normalize(df)
adata_f.layers['quantile_norm']

In [None]:
adata_f.write('GeoMx_count_corrected_with_quantile_normalized_layer.h5ad')