In [1]:
import os
import pandas as pd
import fnmatch
import re

In [13]:
path_data = '/Users/leandro/Desktop/github/NGS-data/dec-OOF/round2/merged'
path_file = '/Users/leandro/Desktop/github/NGS-data/dec-OOF/round2/merged/crispresso.batch'

In [12]:
#-- Loop for creating file names and abbreviations
list_fastq = []
list_fastq2 = []
list_name = []

for file in os.listdir(path_data):
    #only fastq.gz files and generate abbreviated names
    if file.endswith(".fastq.gz"):
        if fnmatch.fnmatch(file,"*R1*"):
            list_fastq.append(file)
        elif fnmatch.fnmatch(file,"*R2*"):
            list_fastq2.append(file)
        short = re.split("[_S]", file)[0]
        list_name.append(short)
#-- Remove duplicates from name list and order all lists
names = []
for i in list_name:
    if i not in names:
        names.append((i))
names.sort()
list_fastq.sort()
list_fastq2.sort()

In [14]:
#-- Loop for creating file names and abbreviations (merged files)
list_fastq = []
list_name = []

for file in os.listdir(path_data):
    #only fastq files and generate abbreviated names
    if file.endswith(".fastq.gz"):
        list_fastq.append(file)
        short = re.split("[_S]", file)[0]
        list_name.append(short)
#-- Remove duplicates from name list and order all lists
names = []
for i in list_name:
    if i not in names:
        names.append((i))
names.sort()
list_fastq.sort()

In [15]:
list_fastq

['dec-OOF-1_S1_L001.merged.fastq.gz',
 'dec-OOF-2_S2_L001.merged.fastq.gz',
 'dec-OOF-3_S3_L001.merged.fastq.gz',
 'dec-OOF-4_S4_L001.merged.fastq.gz',
 'dec-OOF-5_S5_L001.merged.fastq.gz',
 'dec-OOF-7_S6_L001.merged.fastq.gz',
 'dec-OOF-8_S7_L001.merged.fastq.gz',
 'dec-OOF-9_S8_L001.merged.fastq.gz']

In [17]:
names

['dec-OOF-1',
 'dec-OOF-2',
 'dec-OOF-3',
 'dec-OOF-4',
 'dec-OOF-5',
 'dec-OOF-7',
 'dec-OOF-8',
 'dec-OOF-9']

In [18]:
#--Including results into dataframe
df = pd.DataFrame({'names':names, 'fastq_r1':list_fastq})
df

#--saving the file
df_final = df
df_final.to_csv(path_file, sep="\t", index=False)

In [7]:
# generate_crispresso_prime_editing.py

def generate_crispresso_prime_editing_script(output_path, params):
    """
    Generates a bash script for a CRISPRessoBatch run tailored to Prime Editing experiments.
    
    :param output_path: Path to save the bash script (e.g. "./run_crispresso_prime.sh")
    :param params: Dictionary of CRISPRessoBatch parameters
    """
    with open(output_path, "w") as f:
        f.write("#!/bin/bash\n\n")
        f.write("# Auto-generated script to run CRISPRessoBatch for Prime Editing\n")
        f.write("CRISPRessoBatch \\\n")
        
        for key, value in params.items():
            f.write(f"    --{key} \"{value}\" \\\n")
        
        f.write("\n")
    
    print(f"Prime Editing Bash script generated at: {output_path}")


# Example usage
if __name__ == "__main__":
    parameters = {
        "batch_settings": "/Users/leandro/Desktop/github/NGS-data/dec-OOF/round2/merged", # Include input-file path
        "output_folder": "/Users/leandro/Desktop/github/NGS-data/dec-OOF/merged", # Include output bash file path
        "amplicon_seq": "GAAACGCCCATGCAATTAGTCTATTTCTGCTGCAAGTAAGCATGCATTTGTAGGCTTGATGCTTTTTTTCTGCTTCTCCAGCCCTGGCCTGGGTCAATCCTTGGGGCCCAGACTGAGCACGTGATGGCAGAGGAAAGGAAGCCCTGCTTCCTCCAGAGGGCGTCGCAGGACAGCTTTTCCTAGACAGGGGCTAGTATGTGCAGCTCCTGCACCGGGATACTGGTTGACAAGTTTGGCTGGGCTGGAAGCCAGCACCTAGGGAGGTCCCTGGAAGGGGCCAGCCTCACCAGGAGAGGAGGGACCTGGCCCTTCAGGGTCGAGCTCA",
        "prime_editing_override_prime_edited_ref_seq": "GAAACGCCCATGCAATTAGTCTATTTCTGCTGCAAGTAAGCATGCATTTGTAGGCTTGATGCTTTTTTTCTGCTTCTCCAGCCCTGGCCTGGGTCAATCCTTGGGGCCCAGACTGAGCACGcttgTGATGGCAGAGGAAAGGAAGCCCTGCTTCCTCCAGAGGGCGTCGCAGGACAGCTTTTCCTAGACAGGGGCTAGTATGTGCAGCTCCTGCACCGGGATACTGGTTGACAAGTTTG", # edited sequence
        "prime_editing_pegRNA_spacer_seq": "GGCCCAGACTGAGCACGTGA", #pegRNA spacer
        "prime_editing_pegRNA_extension_seq": "TCTGCCATCAcaagCGTGCTCAGTCTG",  # pegRNA extension
        "prime_editing_pegRNA_scaffold_seq": "GTTTTAGAGCTAGGCCAACATGAGGATCACCCATGTCTGCAGGGCCTAGCAAGTTAAAATAAGGCTAGTCCGTTATCACGCCGAAAGGCGGGCACCGAGTCGGTGC",  # scaffold
        "quantification_window_center": "-1",  # Often shifted for prime editing
        "quantification_window_size": "5",
    }

    generate_crispresso_prime_editing_script("run_crispresso_prime.sh", parameters)


Prime Editing Bash script generated at: run_crispresso_prime.sh
