## Scraping log files and calculating RMSDs for quantitative benchmark

### Alphafold PDBs

In [None]:
# Generating Chimera command scripts
cmd="mmaker #1 #0\nsave alignment_PDB_exp-pred.py"
echo -e $cmd > align_PDB_exp-pred.cmd
cmd="mmaker #1 #0\nsave alignment_preds.py"
echo -e $cmd > align_pred.cmd

In [None]:
PDBfiles=$(dir -1 pdbs)
echo -e "PDB name\tpLDDT\tpTM\tRMSD (pruned)\tRMSD (all)\tPruned number of atom pairs\tTotal number of atom pairs\tRuntime" > PDB.exp-pred.alphafold.benchmark
for file in $PDBfiles
do
name=$(basename $file .pdb | cut -c -4 | tr '[:lower:]' '[:upper:]')
run=$(dir -1 runs/alphafold_benchmark | grep $name)
plddt=$(cat runs/alphafold_benchmark/$run/info.log | \
        grep model_1 | grep -Eo "plddt=[0-9]{1,3}\.[0-9]{3}" | \
        cut -c 7-)
ptm=$(cat runs/alphafold_benchmark/$run/info.log | \
    grep model_1 | grep -Eo "ptm=[0-1]\.[0-9]{3}" | \
    cut -c 5-)
time1=$(cat runs/alphafold_benchmark/$run/info.log | \
        head -1 | cut -d '-' -f -3 | xargs -i date -d {} +%s)
time2=$(cat runs/alphafold_benchmark/$run/info.log | \
        tail -1 | cut -d '-' -f -3 | xargs -i date -d {} +%s)
runtime=$(($time2-$time1))
chim_out=$(chimera --nogui \
    runs/alphafold_benchmark/$run/"$run"_relaxed_model_1.pdb \
    pdbs/$file align_PDB_exp-pred.cmd | grep RMSD)
rmsd=$(echo $chim_out | grep -Eo "[0-9]+\.[0-9]{3}" | \
        tr "\n" "\t" | sed 's/\t$//')
pruned=$(echo $chim_out | grep -Eo " [0-9]+ " | \
        tr "\n" "\t" | sed 's/\t$//')
mv alignment_PDB_exp-pred.py ./runs/alphafold_benchmark/$run/alignment_PDB_exp-pred.py
echo -e "$name\t$plddt\t$ptm\t$rmsd\t$pruned\t$runtime" >> PDB.exp-pred.alphafold.benchmark
done
cat PDB.exp-pred.alphafold.benchmark

### ColabFold PDBs

In [None]:
PDBfiles=$(dir -1 pdbs)
echo -e "PDB name\tpLDDT\tpTM\tRMSD (pruned)\tRMSD (all)\tPruned number of atom pairs\tTotal number of atom pairs\tRuntime" > PDB.exp-pred.colabfold.benchmark
for file in $PDBfiles
do
name=$(basename $file .pdb | cut -c -4 | tr '[:lower:]' '[:upper:]')
run=$(dir -1 runs/colabfold_benchmark | grep $name)
plddt=$(cat runs/colabfold_benchmark/$run/info.log | \
        grep model_1 | grep -Eo "plddt=[0-9]{1,3}\.[0-9]{3}" | \
        cut -c 7-)
ptm=$(cat runs/colabfold_benchmark/$run/info.log | \
    grep model_1 | grep -Eo "ptm=[0-1]\.[0-9]{3}" | \
    cut -c 5-)
time1=$(cat runs/colabfold_benchmark/$run/info.log | \
        head -1 | cut -d '-' -f -3 | xargs -i date -d {} +%s)
time2=$(cat runs/colabfold_benchmark/$run/info.log | \
        tail -1 | cut -d '-' -f -3 | xargs -i date -d {} +%s)
runtime=$(($time2-$time1))
chim_out=$(chimera --nogui \
    runs/colabfold_benchmark/$run/"$run"_relaxed_model_1.pdb \
    pdbs/$file align_PDB_exp-pred.cmd | grep RMSD)
rmsd=$(echo $chim_out | grep -Eo "[0-9]+\.[0-9]{3}" | \
        tr "\n" "\t" | sed 's/\t$//')
pruned=$(echo $chim_out | grep -Eo " [0-9]+ " | \
        tr "\n" "\t" | sed 's/\t$//')
mv alignment_PDB_exp-pred.py ./runs/colabfold_benchmark/$run/alignment_PDB_exp-pred.py
echo -e "$name\t$plddt\t$ptm\t$rmsd\t$pruned\t$runtime" >> PDB.exp-pred.colabfold.benchmark
done
cat PDB.exp-pred.colabfold.benchmark

### AlphaFold & ColabFold comparison

In [None]:
cd runs

In [None]:
runs=$(dir -1 alphafold_benchmark)
echo -e "Protein\tpLDDT_AF\tpLDDT_CF\tpTM_AF\tpTM_CF\tRMSD (pruned)\tRMSD (all)\tPruned number of atom pairs\tTotal number of atom pairs\tRuntime_AF\tRuntime_CF" > benchmark.comparison
for run in $runs
do
name=$(echo "$run"| cut -d '_' -f 2-)

# alphafold
plddt_a=$(cat alphafold_benchmark/$run/info.log | \
        grep model_1 | grep -Eo "plddt=[0-9]{1,3}\.[0-9]{3}" | \
        cut -c 7-)
ptm_a=$(cat alphafold_benchmark/$run/info.log | \
    grep model_1 | grep -Eo "ptm=[0-1]\.[0-9]{3}" | \
    cut -c 5-)
time1=$(cat alphafold_benchmark/$run/info.log | \
        head -1 | cut -d '-' -f -3 | xargs -i date -d {} +%s)
time2=$(cat alphafold_benchmark/$run/info.log | \
        tail -1 | cut -d '-' -f -3 | xargs -i date -d {} +%s)
runtime_a=$(($time2-$time1))
    
# colabfold
run_c=$(dir -1 colabfold_benchmark | grep $name)
plddt_c=$(cat colabfold_benchmark/$run_c/info.log | \
        grep model_1 | grep -Eo "plddt=[0-9]{1,3}\.[0-9]{3}" | \
        cut -c 7-)
ptm_c=$(cat colabfold_benchmark/$run_c/info.log | \
    grep model_1 | grep -Eo "ptm=[0-1]\.[0-9]{3}" | \
    cut -c 5-)
time1=$(cat colabfold_benchmark/$run_c/info.log | \
        head -1 | cut -d '-' -f -3 | xargs -i date -d {} +%s)
time2=$(cat colabfold_benchmark/$run_c/info.log | \
        tail -1 | cut -d '-' -f -3 | xargs -i date -d {} +%s)
runtime_c=$(($time2-$time1))

# comparative RMSD
chim_out=$(chimera --nogui \
    colabfold_benchmark/$run_c/"$run_c"_relaxed_model_1.pdb \
    alphafold_benchmark/$run/"$run"_relaxed_model_1.pdb \
    ../align_pred.cmd | grep RMSD)
rmsd=$(echo $chim_out | grep -Eo "[0-9]+\.[0-9]{3}" | \
        tr "\n" "\t" | sed 's/\t$//')
pruned=$(echo $chim_out | grep -Eo " [0-9]+ " | \
        tr "\n" "\t" | sed 's/\t$//')
mv ../alignment_preds.py ./colabfold_benchmark/$run_c/alignment_preds.py
echo -e "$name\t$plddt_a\t$plddt_c\t$ptm_a\t$ptm_c\t$rmsd\t$pruned\t$runtime_a\t$runtime_c" >> benchmark.comparison
done
cat benchmark.comparison