In [None]:
from pymol import cmd 
import numpy as np
import sys
import os

ref_structure = "../homologues_plastic/SP3.pdb"
infilenames = ["../homologues_plastic/SP1.pdb","../homologues_plastic/SP2.pdb","../homologues_plastic/SP3.pdb","../homologues_plastic/A0A1B6ZCQ7.cif"]

#@title Download options
download_pymol = False #@param {type: "boolean"}
#@markdown `download_pymol` allows you to download a pymol file of your aligned structures. 
outfile_name = "tmp/outfile" #@param {type:"string"}
#@markdown Choose name of the pymol file.
download_alignment = False #@param {type: "boolean"}
#@markdown `download_alignment` allows you to download a alignment file of your aligned structures in sequence format.
alignment_file_name = "tmp/alignment.aln" #@param {type:"string"}
#@markdown Choose name of the alignment file.

# removing spaces from outfile and add .pse
outfile_name = "_".join(outfile_name.split(" "))+".pse"

#@title Color options
color_mode = "similarity" #@param ["similarity", "hotspot", "none"] {type:"string"}
#@markdown `color_mode` specify which way the structures should be colored.

#@title SIMalign options
max_iterations = 3 #@param {type:"integer"}
#@markdown `max_iterations` is the maximum number of alignments. A high number can lead to slow runtime.
min_aligned_aa = 100 #@param {type:"integer"}
#@markdown `min_aligned_aa` is how many amino acid that minimum should be used for alignment. A low number can lead to overfitting.
max_dist = 6 #@param {type:"integer"}
#@markdown `max_dist` is the maximum length between to amino acids before it is considered as a gab in the alignment. A too low number can lead to false gabs and a too high number can lead to false positive.
# remove_chain_duplicate = True #@param {type:"boolean"}
# For now it only takes chain A
#  #@markdown If `remove_chain_duplicate` is true then is chain duplicates removed from the structure.

#@title Foldseek options
#@markdown Foldseek doesn't work yet!
foldseek = False #@param {type:"boolean"}
foldseek_database = "Alphafold/UniProt" #@param ["Alphafold/UniProt","Alphafold/UniProt50-minimal","Alphafold/UniProt50","Alphafold/Proteome","Alphafold/Swiss-Prot","ESMAtlas30","PDB","Thermophilic_DB"] {type:"string"}
foldseek_variable_tresshold = "evalue" #@param ["evalue","pident","fident","nident","alnlen","bits","mismatch","qcov","tcov","lddt","qtmscore","ttmscore","alntmscore","rmsd","prob"]
foldseek_value_tresshold = 0.0001  #@param {type:"number"}
foldseek_search_against = "ref_structure" #@param ["ref_structure","all_structures"]

#@title Hotspot finding options
find_hotspots = True #@param {type: "boolean"}
#@markdown If `find_hotspots` is true, then the program will find amino acid in the structure that can be mutated to potentially alter the stability of the protein.
#hotspot_min_size = 2 #@param {type: "integer"}
#For now we only finds single mutations

#@title Run prediction
import SIMalign


if len(infilenames) < 3 and foldseek == False:
    print("ERROR: Import at least 3 files or activate foldseek.")
    sys.exit(1)

if foldseek:
    import foldseek_search
    foldseek_search.run(foldseek_database,foldseek_variable_tresshold,foldseek_value_tresshold,foldseek_search_against,ref_structure,infilenames)
len_ref_structure, score_list, structure_list, core_selection = SIMalign.run(ref_structure, infilenames, max_iterations, min_aligned_aa, max_dist, alignment_file_name)

if find_hotspots:
    import hotspot_finder
    hotspot_list, exposed_list = hotspot_finder.run(structure_list,alignment_file_name)
    hotspot_finder.print_hotspot(hotspot_list,structure_list)
else:
    hotspot_list, exposed_list = None, None


if color_mode == "hotspot" and find_hotspots == False:
    print("ERROR: unable to color hotspot without finding them!")
else:
    import colors
    colors.run(color_mode,hotspot_list,score_list,structure_list,core_selection,exposed_list)

from pymol import cmd
cmd.save(outfile_name)
if download_pymol:
    files.download(outfile_name)
else:
    print("Done")