# Go Through Each Patient and Create a Cellbender Output Folder

In [1]:
import pandas as pd
import os
import subprocess

In [2]:
root = '/orcd/data/lhtsai/001/om2/mabdel03/files/ACE_Analysis/Data/Tsai/Preprocessing/Preprocessed_Counts/SocIsl'
ACE_df = pd.read_csv('/orcd/data/lhtsai/001/om2/mabdel03/files/ACE_Analysis/Data/Tsai/Preprocessing/FASTQ_Transfer/CSVs/SocIsl_Tsai_To_Openmind.csv')

projids = set(ACE_df['projid'])

#Commented the code below out as it should only be run once to create the output folders

for num in projids:
    os.mkdir(os.path.join(root, str(num)))


# Generate Batch Scripts For Cellbender

In [3]:
parent_scripts = '/orcd/data/lhtsai/001/om2/mabdel03/files/ACE_Analysis/Data/Tsai/Preprocessing/Cellbender/SocIsl/Batch_Scripts'
root_dir = '/om/scratch/Mon/mabdel03/SocIsl/Counts'

for num in projids:

    inp_dir = os.path.join(root_dir, str(num)+'/outs')
    inp_file = os.path.join(inp_dir, 'raw_feature_bc_matrix.h5')
    out_dir = os.path.join(root, str(num))
    out_file = os.path.join(out_dir, 'processed_feature_bc_matrix.h5')
    
    output = f"""#!/bin/bash
#SBATCH -n 32                    # Number of cores requested
#SBATCH -t 47:00:00             # Runtime in minutes or
#SBATCH --output=/orcd/data/lhtsai/001/om2/mabdel03/files/ACE_Analysis/Data/Tsai/Preprocessing/Cellbender/Resilient/Batch_Scripts_Outs/slurm-%j.out
#SBATCH --error=/orcd/data/lhtsai/001/om2/mabdel03/files/ACE_Analysis/Data/Tsai/Preprocessing/Cellbender/Resilient/Batch_Scripts_Err/slurm-%j.err
#SBATCH --gres=gpu:a100:1
#SBATCH --mem=250G
#SBATCH --mail-user=nkhera@college.harvard.edu
#SBATCH --mail-type=FAIL

source /orcd/data/lhtsai/001/om2/mabdel03/miniforge3/etc/profile.d/conda.sh

conda activate /orcd/data/lhtsai/001/om2/mabdel03/conda_envs/Cellbender_env

cd {out_dir}

cellbender remove-background --cuda --input {inp_file} --fpr 0 --output {out_file}
"""
    filename = str(num)+'_cellbender.sh'
    f = open(os.path.join(parent_scripts, filename), 'x') #make the file
    f.write(output) #write to the file
    f.close()




In [4]:
    sbatch_command = f'sbatch {os.path.join(parent_scripts, filename)}' #submit sbatch command
    process = subprocess.Popen(sbatch_command.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()