In [None]:
import pandas as pd
import anndata as ad
import scanpy as sc
from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2
import numpy as np

In [None]:
#Load the data
data = sc.read_h5ad('../../Processed_Data/NetworkData_HVGs_basal_5ht6ho_without_PTPRC_Adgre1.h5ad')#Change path as required.

#Subset the data for 5Ht
ht5_data = data[data.obs['orig.ident'] == '5Ht']

#Load the TFs
tfs = load_tf_names('../../Processed_Data/allTFs_mm.txt')

In [None]:
#Get the expression data
counts_df_5ht_basal = pd.DataFrame(ht5_data.X, index=ht5_data.obs_names,columns=ht5_data.var_names)

#Get the TFs that are in the data
tfs = sorted(list((set(tfs).intersection(ht5_data.var_names))))

In [None]:
len(tfs) # 459

In [None]:
# Save the TFs used in the analysis (if needed)
#with open('../../Results/SCENIC_results/results_step1_5ht_basal/451_tfs.txt','w') as f:
 #  for tf in tfs:
  #      f.write(f'{tf.upper()}\n')

# RUN GRNBOOST2 for all TFs but in batches of 10 TFs 

In [None]:
#Run grnboost2 with SGBM parameters, without a seed - 5ht_basal
for i in range(0,20):#This will run the loop 20 times:
    print(f'Run: {i}')
    #Run GRNBOOST2 for batches of 10 TFs.
    #Create a variable that will contain the entire edge list
    adjacencies = None
    index = 0
    iterations = np.floor(len(tfs)/10)
    n = 10
    count = 0
    #Run in batches of 10
    while count < iterations + 1:
        
        curr_tfs = tfs[index:index+n]
        curr_adjacencies = grnboost2(counts_df_5ht_basal, tf_names=curr_tfs, verbose=False)
        
        if adjacencies is None:
            adjacencies = curr_adjacencies
        else:
            adjacencies = pd.concat([adjacencies, curr_adjacencies], axis=0)
        index += 10
        count += 1
        if count == iterations:
            n = int(len(tfs) - iterations*n)
        print(f'Done for {count*10} TFs')
        ## Save results for every 100 TFs if needed
        # if count%10 == 0:
        #     adjacencies.to_csv(f'../../results/results_step_2_strat_1_basal/5ht_basal_adjacencies_run{i}_{count*10}TFs.csv')
    adjacencies.to_csv(f'../../Results/Results_no_ptprc_adgre1/SCENIC_results/results_step1_5ht_basal/5ht_basal_adjacencies_run{i}_allTFs.csv')