In [None]:
import pandas as pd
import os
import subprocess
import csv
import shutil

In [None]:
def combine_meta_data(df_rna_path, df_atac_path, sample_column, keys=['rna', 'atac']):
    return pd.concat(
        [
            pd.read_table(df_rna_path, index_col=sample_column),
            pd.read_table(df_atac_path, index_col=sample_column)
        ],
        axis=1,
        keys=keys
    )

In [None]:
keys = ['rna', 'atac']
mapped_samples = []
meta_data = combine_meta_data('../.data/meta_data/E-MTAB-12916.sdrf.txt', '../.data/meta_data/E-MTAB-12919.sdrf.txt', 'Description', keys=keys)
# Remove already mapped samples from the dataframe
meta_data = meta_data[~meta_data.index.isin(mapped_samples)]

In [None]:
def write_csv(csv_file, data, mode):
    with open(csv_file, mode=mode, newline='') as file:
        writer = csv.writer(file)
        writer.writerows(data)

In [None]:
parent_dir = '/mnt/LaCIE/ceger/Projects/human_heart_mapping/human_heart_mapping/0-raw_data_processing/3-240417-E-MTAB-12916_E-MTAB-12919'
source_dir = os.getcwd()
data_dir = os.path.join(parent_dir, '.data')
runs = meta_data.index.unique()
cellranger = '/home/ceger/CellRanger/cellranger-arc-2.0.2/cellranger-arc'
sample_col = 'Source Name'
file_name_col = 'Scan Name'
url_col = 'Comment[FASTQ_URI]'
output_path = os.path.join(data_dir, 'mapping_py')
cellranger_reference = os.path.join(data_dir, 'cr_arc_index/GRCh38')

for run in runs:
    run_dir = os.path.join(output_path, run) 
    os.makedirs(run_dir, exist_ok=True)
    download_df = meta_data[meta_data.index == run]
    libraries_colums = [['fastqs', 'sample', 'library_type']]
    libraries_filepath = os.path.join(run_dir, 'libraries.csv')
    write_csv(libraries_filepath, libraries_colums, mode='w')
    for key in keys:
        sample = download_df[key][sample_col][0]
        sample_dir = os.path.join(run_dir, sample)
        os.makedirs(sample_dir, exist_ok=True)
        libraries_values = [[sample_dir, sample, 'Gene Expression' if key == 'rna' else 'Chromatin Accessibility' if key == 'atac' else None]]
        write_csv(libraries_filepath, libraries_values, mode='a')
        file_dict = {row[file_name_col]: row[url_col] for index, row in download_df[key].iterrows()}
        for file_name, url in file_dict.items():
            file_path = os.path.join(sample_dir, file_name)
            subprocess.run(['axel', '-n', '10', '--output', file_path, url],
                           check=True)
    # Running Cellranger
    cellranger_arc_run = [
        cellranger,
        'count',
        '--id=' + run,
        '--reference=' + cellranger_reference,
        '--libraries=' + libraries_filepath,
        '--localcores=20',
        '--localmem=32'
    ]
    subprocess.run(cellranger_arc_run)
    
    # Moving output files into mapping directory
    cellranger_run_output_path = os.path.join(source_dir, run)
    shutil.move(cellranger_run_output_path, run_dir)

    # Deleting the downloaded files
    for key in keys:
        sample = download_df[key][sample_col][0]
        sample_dir = os.path.join(run_dir, sample)
        try:
            shutil.rmtree(sample_dir)
            print(f'{key} sample folder deleted')
        except OSError as e:
            print(f"Error: {sample_dir} : {e.strerror}")

In [None]:
cellranger_arc_run = [
    '/home/ceger/CellRanger/cellranger-arc-2.0.2/cellranger-arc',
    'count',
    '--id=' + 'test',
    '--reference=' + '/mnt/LaCIE/ceger/Projects/human_heart_mapping/human_heart_mapping/0-raw_data_processing/3-240417-E-MTAB-12916_E-MTAB-12919/.data/cr_arc_index/GRCh38',
    '--libraries=' + '/mnt/LaCIE/ceger/Projects/human_heart_mapping/human_heart_mapping/0-raw_data_processing/3-240417-E-MTAB-12916_E-MTAB-12919/.data/mapping_py/HCAHeart9508627_HCAHeart9508819/libraries.csv',
    '--localcores=20',
    '--localmem=32'
]
subprocess.run(cellranger_arc_run)