In [1]:
# 'scwrl4 -i {pdb_filename} -s {new_fasta_seq_file} -o {remodeled_pdb_file} > {workSubfolder}/modeling.log' 

In [2]:
import os
import pandas as pd

In [3]:
# Load Data
df = pd.read_csv('cleaned_data.csv')
df

Unnamed: 0,cath_id,class,architecture,topology,superfamily,resolution_in_angstroms,pdb_id,sequences,cath_indices,architecture_id,label,uniprot_id
0,2w3sB01,3,90,1170,50,2.60,2w3s,SVGKPLPHDSARAHVTGQARYLDDLPCPANTLHLAFGLSTEASAAI...,"[(2, 124)]","(3, 90)",9,O54051
1,3be3A00,2,30,30,320,2.04,3be3,QDFRPGVYRHYKGDHYLALGLARADETDEVVVVYTRLYARAGLPMS...,"[(6, 81)]","(2, 30)",2,A0A0H3LS10
2,3zq4C03,3,10,20,580,3.00,3zq4,DIGNIVLRDRRILSEEGLVIVVVSIDMDDFKISAGPDLISRGFVYM...,"[(449, 555)]","(3, 10)",5,Q45493
3,1peqA03,1,10,1650,20,2.80,1peq,DITFRLAKENAQMALFSPYDIQRRYGKPFGDIAISERYDELIADPH...,"[(294, 346)]","(1, 10)",0,Q08698
4,1bdoA00,2,40,50,100,1.80,1bdo,EISGHIVRSPMVGTFYRTPSPDAKAFIEVGQKVNVGDTLCIVEAMK...,"[(77, 156)]","(2, 40)",3,P0ABD8
...,...,...,...,...,...,...,...,...,...,...,...,...
5909,2yyiA02,2,40,110,10,1.66,2yyi,ATTHALTNPQVNRARPPSGQPDPYIPVGVVKQTEKGIVVRGARMTA...,"[(139, 266)]","(2, 40)",3,Q5SJP8
5910,4mo0A00,3,30,780,10,2.10,4mo0,EQKIKIYVTKRRFGKLMTIIEGFDTSVIDLKELAKKLKDICACGGT...,"[(24, 102)]","(3, 30)",7,Q57902
5911,1vq8X00,3,10,440,10,2.20,1vq8,ERVVTIPLRDARAEPNHKRADKAMILIREHLAKHFSVDEDAVRLDP...,"[(7, 88)]","(3, 10)",5,P18138
5912,1ze3D00,3,10,20,410,1.84,1ze3,DLYFNPRFLADDPQAVADLSRFENGQELPPGTYRVDIYLNNGYMAT...,"[(1, 125)]","(3, 10)",5,P30130


In [4]:
def create_bash_script(df, script_path):
    with open(script_path, 'w') as modelScript:
        # Write the initial file check lines
        fileCheckString = ("#############################################################################################################################\n"
                           "############ Checking for scwrl4 in the workFolder ##########################################################################\n"
                           "#############################################################################################################################\n"
                           "echo \"Checking for scwrl4..\"; if [ ! -f \"../src/scwrl/scwrl4\" ]; then echo \"[ERROR]: scwrl4 not found. Exitting\!\"; exit 1; else echo \"scwrl4 found. Proceeding....\"; fi ; \n")
        modelScript.write(fileCheckString)

        # Loop through the DataFrame
        for index, row in df.iterrows():
            cath_id = row['cath_id']
            sequences = row['sequences']
            
            workSubfolder = f"../data/{cath_id}"
            
            pdb_dir = f"{workSubfolder}/pdb"
            pdb_filename = f"{pdb_dir}/{cath_id}"
            
            seqs_dir = f"{workSubfolder}/seqs"
            new_fasta_seq_file = f"{seqs_dir}/modified.fasta"

            # Ensure the directories exist
            os.makedirs(pdb_dir, exist_ok=True)
            os.makedirs(seqs_dir, exist_ok=True)

            # Write sequences to modified.fasta
            with open(new_fasta_seq_file, 'w') as fasta_file:
                fasta_file.write(sequences)

            # Prepare script lines
            commentString = ("#############################################################################################################################\n"
                             f"###################### BUILDING SCWRL MODEL FOR WORK INDEX {index} (pdb: {cath_id}) ########################################\n"
                             "#############################################################################################################################\n")
            modelScript.write(commentString)

            checkLogsForSkip = f"( if ! {{ [ -f {pdb_dir}/step1-model.log ] && ( grep -q Scwrl4_Execution {pdb_dir}/step1-model.log ) }} ; then   "
            runScwrl = f" ../src/scwrl/scwrl4  -i {pdb_filename} -s {new_fasta_seq_file} -o {pdb_dir}/{cath_id}_remodeled.pdb > {workSubfolder}/modeling.log; "
            addRunToLogs = f" echo \"scwrl4 model built for [{cath_id}] on $(date) \" > {workSubfolder}/modelStatus.log ;  fi; ) &"
            newLine = checkLogsForSkip + runScwrl + addRunToLogs + "\n"
            statusString = f"echo \"[FINISHED]: {index} Modeled PDB: {cath_id}\"\n"

            modelScript.write(newLine)
            modelScript.write(statusString)
            modelScript.write("wait;\n")

In [5]:
# Create the bash script
bash_script = 'scwrl4_remodeling.sh'
create_bash_script(df, bash_script)

In [10]:
# Open the file and read its contents
with open(bash_script, 'r') as file:
    for _ in range(22):
        line = file.readline()
        if not line:
            break
        print(line, end='')

#############################################################################################################################
############ Checking for scwrl4 in the workFolder ##########################################################################
#############################################################################################################################
echo "Checking for scwrl4.."; if [ ! -f "../src/scwrl/scwrl4" ]; then echo "[ERROR]: scwrl4 not found. Exitting\!"; exit 1; else echo "scwrl4 found. Proceeding...."; fi ; 
#############################################################################################################################
###################### BUILDING SCWRL MODEL FOR WORK INDEX 0 (pdb: 2w3sB01) ########################################
#############################################################################################################################
( if ! { [ -f ../data/2w3sB01/pdb/step1-model.log ] && ( grep -q Scwrl4_Execution

In [9]:
# Run Remodeling
!bash scwrl4_remodeling.sh

Checking for scwrl4..
scwrl4 found. Proceeding....
[FINISHED]: 0 Modeled PDB: 2w3sB01
[FINISHED]: 1 Modeled PDB: 3be3A00
[FINISHED]: 2 Modeled PDB: 3zq4C03
[FINISHED]: 3 Modeled PDB: 1peqA03
[FINISHED]: 4 Modeled PDB: 1bdoA00
[FINISHED]: 5 Modeled PDB: 3r0hG01
[FINISHED]: 6 Modeled PDB: 1aqcA00
[FINISHED]: 7 Modeled PDB: 2gnxA01
[FINISHED]: 8 Modeled PDB: 6hjfA02
[FINISHED]: 9 Modeled PDB: 1o1zA00
[FINISHED]: 10 Modeled PDB: 4w8pA02
[FINISHED]: 11 Modeled PDB: 1r7lA00
[FINISHED]: 12 Modeled PDB: 3i9v600
[FINISHED]: 13 Modeled PDB: 3rylA01
[FINISHED]: 14 Modeled PDB: 2i8dA01
[FINISHED]: 15 Modeled PDB: 4fixA01
[FINISHED]: 16 Modeled PDB: 3m92A01
[FINISHED]: 17 Modeled PDB: 3vkgA02
[FINISHED]: 18 Modeled PDB: 1rmvA00
[FINISHED]: 19 Modeled PDB: 4fgmA01
[FINISHED]: 20 Modeled PDB: 1k3eA01
[FINISHED]: 21 Modeled PDB: 1yn3A00
[FINISHED]: 22 Modeled PDB: 1a70A00
[FINISHED]: 23 Modeled PDB: 5bncA02
[FINISHED]: 24 Modeled PDB: 1u5uA00
[FINISHED]: 25 Modeled PDB: 4hj1C03
[FINISHED]: 26 Modeled 