## Analyse pre-processed data with scGCO

In [None]:
from math import gcd
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import seaborn as sns
import csv
import dill
from sklearn.metrics.pairwise import cosine_distances
from scGCO import *
import scanpy as sc

In [None]:
#read dataframe of counts back in 
norm_counts = pd.read_csv('X.csv',header=None)
norm_counts.head()

In [None]:
##need to add in gene names and cell types
obs_norm_counts = pd.read_csv('obs.csv')
norm_counts_barcodes = obs_norm_counts["Unnamed: 0"]

#add this column as index to norm_counts
norm_counts.index = norm_counts_barcodes

In [None]:
##get gene names
var_norm_counts = pd.read_csv('var.csv')
var_genes = list(var_norm_counts["Unnamed: 0"])

#add these as column names
norm_counts.columns = var_gene

In [None]:
array_row = list(ffpe_human_prostate.obs["array_row"])
array_col = list(ffpe_human_prostate.obs["array_col"])
locs = np.column_stack((array_col,array_row))

In [None]:
#begin scGCO processing for determing presence of SVGs
#create graph connecting space and spots
exp= norm_counts.iloc[:,0]
cellGraph= create_graph_with_weight(locs, exp)

#Look at graph
fig, ax= plt.subplots(1,1,figsize=(5,5)) #, dpi=300)
ax.set_aspect('equal')

In [None]:
exp= norm_counts.iloc[:,0].values
cellGraph = create_graph_with_weight(locs, exp)
ax.scatter(locs[:,0], locs[:,1], s=1, color='black')
for i in np.arange(cellGraph.shape[0]):
    x = (locs[int(cellGraph[i,0]), 0], locs[int(cellGraph[i,1]), 0]) 
    y = (locs[int(cellGraph[i,0]), 1], locs[int(cellGraph[i,1]), 1])     
    ax.plot(x, y, color='black', linewidth=0.5)
    
plt.title('CellGraph')

In [None]:
#Skip to step 2: Gene expression processing with Gaussian mixture modelling
output_dir = 'scGCO_outputs/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir) 
    
import time
t0 = time.time()
gmmDict=multiGMM(norm_counts)
t1= time.time()
print(t1-t0, ' second')

# # save gmm results
fileName=os.path.join(output_dir,"ffpe_prostate_scanpy_scgco_gmmDict.pkl")

store_gmm(gmmDict,fileName)

In [None]:
#Run main function to identify SVGs
t0= time.time()
result_df= identify_spatial_genes(locs, norm_counts, 
                                    cellGraph ,gmmDict)
print('Running time: {} seconds'.format(time.time()-t0))

#write to filenn
write_result_to_csv(result_df,'ffpe_prostate_scGCO_svgs.csv')

In [None]:
#Select genes with an fdr cut-off of 0.05
fdr_cutoff=0.05
fdr_df=result_df.sort_values('fdr').loc[result_df.fdr<fdr_cutoff,]


#save these genes to a file
fdr_df.to_csv('ffpe_prostate_scGCO_sig_svgs.csv')