# Create Sequence File
- Input a DNA or RNA sequence, and this notebook will create a PDB file using tleap

In [None]:
import os, subprocess

data_dir = "../../data"
os.chdir(data_dir)

In [None]:
def is_valid_DNA(seq):
    return all(nucleotide in "ATCG" for nucleotide in seq)

def is_valid_RNA(seq):
    return all(nucleotide in "AUCG" for nucleotide in seq)

while True:
    sequence = input("Enter the DNA/RNA sequence: ")
    
    if is_valid_DNA(sequence):
        molecule_type = "DNA"
        print(f"You've entered a valid DNA sequence: {sequence}")
        break
    elif is_valid_RNA(sequence):
        molecule_type = "RNA"
        print(f"You've entered a valid RNA sequence: {sequence}")
        break
    else:
        print("The sequence is neither valid DNA nor RNA. Please try again.")


In [None]:
def write_fasta(seq, filename):
    with open(filename, 'w') as f:
        f.write('>Aptamer_Sequence\n')
        f.write(seq + '\n')
        print(f"The sequence is output into sequence.fasta file")

# Replace 'output.fasta' with your preferred output filename
write_fasta(sequence, 'sequence.fasta')


In [None]:
def modify_sequence_for_OL15(seq, mol_type):
    if mol_type == "DNA":
        return seq.replace("A", "DA ").replace("T", "DT ").replace("C", "DC ").replace("G", "DG ")
    elif mol_type == "RNA":
        return seq.replace("A", "RA ").replace("U", "RU ").replace("C", "RC ").replace("G", "RG ")

modified_sequence = modify_sequence_for_OL15(sequence, molecule_type)

sequence_file = "create_sequence.leap"
with open(sequence_file, "w") as file:
    if molecule_type == "DNA":
        file.write("source leaprc.DNA.OL15\n")  # For DNA with OL15 parameters
    else:
        file.write("source leaprc.RNA.OL15\n")  # For RNA with OL15 parameters
    
    file.write(f"seq = sequence {{{modified_sequence}}}\n")
    file.write("savepdb seq sequence.pdb\n")
    file.write("quit\n")

print("The LEAP setup file for aptamer is created")

In [None]:
subprocess.run(["tleap", "-f", "create_sequence.leap"])

In [None]:
input_filename = "sequence.pdb"
output_filename = "sequence_modified.pdb"

# Define the residue name
residue_name = "APT"

with open(input_filename, 'r') as infile, open(output_filename, 'w') as outfile:
    for line in infile:
        if line.startswith("ATOM"):
            # Replace residue name (columns 18-20) and residue number (columns 23-26)
            # Setting all residues to 'APT' and '1'
            line = line[:17] + f"{residue_name:3}" + " A   1" + line[26:]
        outfile.write(line)

print(f"Modified PDB saved as {output_filename}")
