In [1]:
import os
import pandas as pd
import fnmatch
import re

In [14]:
path_data = '/Users/leandro/Desktop/github/NGS-data/SeV-ORF1_251003/merged'
path_file = '/Users/leandro/Desktop/github/NGS-data/SeV-ORF1_251003/merged/crispresso.batch'

In [13]:
#-- Loop for creating file names and abbreviations
list_fastq = []
list_fastq2 = []
list_name = []

for file in os.listdir(path_data):
    #only fastq.gz files and generate abbreviated names
    if file.endswith(".fastq.gz"):
        if fnmatch.fnmatch(file,"*R1*"):
            list_fastq.append(file)
        elif fnmatch.fnmatch(file,"*R2*"):
            list_fastq2.append(file)
        short = re.split("[_S]", file)[0]
        list_name.append(short)
#-- Remove duplicates from name list and order all lists
names = []
for i in list_name:
    if i not in names:
        names.append((i))
names.sort()
list_fastq.sort()
list_fastq2.sort()

In [15]:
#-- Loop for creating file names and abbreviations (merged files)
list_fastq = []
list_name = []

for file in os.listdir(path_data):
    # Only process fastq.gz files
    if file.endswith(".fastq.gz"):
        list_fastq.append(file)
        
        # Extract the part before _S using regex
        match = re.search(r'(.+)_S\d+', file)
        if match:
            short = match.group(1)
            list_name.append(short)

# Remove duplicates from list_name
names = []
for i in list_name:
    if i not in names:
        names.append(i)
names.sort()
list_fastq.sort()

In [16]:
list_fastq

['SeV-PEv1-c2-index9_S9_L001.merged.fastq.gz',
 'SeV-PEv1-c3-index10_S10_L001.merged.fastq.gz',
 'Undetermined_S0_L001.merged.fastq.gz',
 'control-index8_S8_L001.merged.fastq.gz']

In [17]:
names

['SeV-PEv1-c2-index9', 'SeV-PEv1-c3-index10', 'Undetermined', 'control-index8']

In [18]:
#--Including results into dataframe
df = pd.DataFrame({'names':names, 'fastq_r1':list_fastq})
df

#--saving the file
df_final = df
df_final.to_csv(path_file, sep="\t", index=False)

In [36]:
# generate_crispresso_prime_editing.py

def generate_crispresso_prime_editing_script(output_path, params):
    """
    Generates a bash script for a CRISPRessoBatch run tailored to Prime Editing experiments.
    
    :param output_path: Path to save the bash script (e.g. "./run_crispresso_prime.sh")
    :param params: Dictionary of CRISPRessoBatch parameters
    """
    with open(output_path, "w") as f:
        f.write("#!/bin/bash\n\n")
        f.write("# Auto-generated script to run CRISPRessoBatch for Prime Editing\n")
        f.write("CRISPRessoBatch \\\n")
        
        for key, value in params.items():
            f.write(f"    --{key} \"{value}\" \\\n")
        
        f.write("\n")
    
    print(f"Prime Editing Bash script generated at: {output_path}")

# Example usage HEK3  --   ¡¡¡DONT USE LOWER CASE!!!
#if __name__ == "__main__":
    #parameters = {
        #"batch_settings": "/Users/leandro/Desktop/github/NGS-data/SeV-HEK_251003/merged/crispresso.batch", # Include input-file path
        #"output_folder": "/Users/leandro/Desktop/github/NGS-data/SeV-HEK_251003", # Include output bash file path
        #"amplicon_seq": "",
        #"prime_editing_override_prime_edited_ref_seq": "", # edited sequence
        #"prime_editing_pegRNA_spacer_seq": "GGCCCAGACTGAGCACGTGA", #pegRNA spacer 5-3'
        #"prime_editing_pegRNA_extension_seq": "TCTGCCATCACAAGCGTGCTCAGTCTG",  # pegRNA extension 5-3' 
        #"prime_editing_pegRNA_scaffold_seq": "GTTTTAGAGCTAGGCCAACATGAGGATCACCCATGTCTGCAGGGCCTAGCAAGTTAAAATAAGGCTAGTCCGTTATCACGCCGAAAGGCGGGCACCGAGTCGGTGC",  # scaffold
        #"quantification_window_center": "-1",  # Often shifted for prime editing
        #"quantification_window_size": "5",
    #}

    #generate_crispresso_prime_editing_script("run_crispresso_prime.sh", parameters)


# Example usage ORF1  --   ¡¡¡DONT USE LOWER CASE!!!

if __name__ == "__main__":
    parameters = {
        "batch_settings": "/Users/leandro/Desktop/github/NGS-data/SeV-ORF1_251003/merged/crispresso.batch", # Include input-file path
        "output_folder": "/Users/leandro/Desktop/github/NGS-data/SeV-ORF1_251003", # Include output bash file path
        "amplicon_seq": "TCTACGTCTGATTGGTGTACCTGAAAGTGATGTGGAGAATGGAACCAAGTTGGAAAACACTCTGCAGGATATTATCCAGGAGAACTTCCCCAATCTAGCAAGGCAGGCCAACGTTCAGATTCAGGAAATACAGAGAACGCCACAAAGATACTCCTCGAGAAGAGCAACTCCAAGACACATAATTGTCAGATTCACCAAAGTTGAA",
        "prime_editing_override_prime_edited_ref_seq": "TCTACGTCTGATTGGTGTACCTGAAAGTGATGTGGAGAATGGAACCAAGTTGGAAAACACTCTGCAGGATATTATCCAGGAGAACTTCCCCAACTTATCTAGCAAGGCAGGCCAACGTTCAGATTCAGGAAATACAGAGAACGCCACAAAGATACTCCTCGAGAAGAGCAACTCCAAGACACATAATTGTCAGATTCACCAAAGTTGAAA", # edited sequence
        "prime_editing_pegRNA_spacer_seq": "CAAUCUAGCAAGGCAGGCC", #pegRNA spacer 3-5' RNA!!!
        "prime_editing_pegRNA_extension_seq": "CTGCCTTGCTAGATAAGTTGGGGAAGTTCTC",  # pegRNA extension 3-5' 
        "prime_editing_pegRNA_scaffold_seq": "GTTTTAGAGCTAGAAATAGCAAGTTAAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGC",  # scaffold
        "quantification_window_center": "-1",  # Often shifted for prime editing
        "quantification_window_size": "5",
    }

    generate_crispresso_prime_editing_script("run_crispresso_prime.sh", parameters)


Prime Editing Bash script generated at: run_crispresso_prime.sh
