##SIMalign

**How to:**
1.   Install dependencies be running first cell (takes some minutes).
2.   When it is done, run the "Importing files" cell.
3.   A bottum saying "Choose Files" will appear. Press it and choose all the structures that you want to analyse (At least 3 structures).
<!-- 4.   A dropdown menu will appear. Choose the structure that you want to be the reference and Press "RUN". -->
4.   Type which structure you want to be your reference structure.

In [None]:
#@title Install dependencies


import os
if not os.path.isfile("SIMalign_READY"):
    print("installing condacolab...")
    os.system("pip install -q condacolab")
    import condacolab
    condacolab.install()
    print("installing rdkit...")
    os.system("mamba install -c conda-forge rdkit")
    print("installing pymol...")
    os.system("mamba install -c conda-forge pymol-open-source")
    os.system("wget https://raw.githubusercontent.com/rdkit/rdkit/master/Docs/Book/data/cdk")
    os.system("which pymol")
    import subprocess
    cmd = subprocess.Popen(["pymol", "-cKRQ"])
    os.system("ps aux | grep pymol")
    os.system("pip install py3Dmol")
    raw_script = "https://raw.githubusercontent.com/RuneROe/git_color_by_similarity/master/SIMalign.py"
    local_script_path = "/content/SIMalign.py"
    os.system(f"wget {raw_script} -O {local_script_path}")
    os.system("touch SIMalign_READY")
    print("Done")
else:
    print("Dependendies already installed.")

In [None]:
#@title Importing files

#@markdown Be aware that overfitting can occur if the amino acid sequences are too similar. It is recomanded that seqeunces of more than 80% similarity are removed before importing to the program.

# import ipywidgets as widgets
# from IPython.display import display
from google.colab import files
import sys
import os



# Removing old uploads
OK_files = {".config", "condacolab_install.log", "__pycache__", "SIMalign_READY", "SIMalign.py", "sample_data"}
for file in os.listdir():
    if file not in OK_files:
        os.remove(file)

# Wait until files are removed
while True:
    if set(os.listdir()).issubset(OK_files):
        break


# Create a list of options for the dropdown
infiles = files.upload()
infilenames = list(infiles.keys())
for file in infilenames:
    if " " in file:
        print("ERROR: No spaces in the names of the uploaded files are allowed.")
        sys.exit(1)
ref_structure = infilenames[0]
# len_ref_structure = 0
# score_ref_structure = []


# # Create a dropdown widget
# dropdown = widgets.Dropdown(
#     options=infiles,
#     description='Reference structure:'
# )

# # Create a button widget
# proceed_button = widgets.Button(
#     description='RUN',
#     disabled=False,
#     button_style='',  # 'success' or '' (empty) for default
#     tooltip='Click to proceed',
#     icon=''
# )



# Function to handle button click
# def on_button_click(b):
#     proceed_button.disabled = True
#     selected_option = dropdown.index
#     global ref_structure
#     global len_ref_structure
#     global score_ref_structure
#     ref_structure = infilenames[selected_option]
#     print(f'Selected reference structure: {ref_structure}')
#     import SIMalign
#     len_ref_structure, score_ref_structure = SIMalign.run(ref_structure, infilenames, max_iterations, min_aligned_aa, max_dist, remove_chain_duplicate, outfile_name, color=color_by_similarity)
#     files.download(outfile_name)

#     print("Done")


# Attach the button click function to the button's click event
# proceed_button.on_click(on_button_click)

# # Display the widgets
# display(dropdown)
# display(proceed_button)



# Display the list of files to the user
print("Choose a reference structure:")

# Prompt the user to choose a file
infile_set = set(infilenames)
while True:
    choice = input("Reference: ")
    number = 0
    for file in infilenames:
        if file.startswith(choice):
            number += 1
            ref_structure = file
    if number == 1:
        print(f'Selected reference structure: {ref_structure}')
        break
    elif number > 1:
        print("Not unique choice. Please enter full file name or remove files of similar names.")
    else:
        print("Invalid choice. Please enter a name of a file.")



In [None]:
#@title Run prediction
import SIMalign

max_iterations = 3 #@param {type:"integer"}
#@markdown `max_iterations` is the maximum number of alignments. A high number can lead to slow runtime.
min_aligned_aa = 100 #@param {type:"integer"}
#@markdown `min_aligned_aa` is how many amino acid that minimum should be used for alignment. A low number can lead to overfitting.
max_dist = 6 #@param {type:"integer"}
#@markdown `max_dist` is the maximum length between to amino acids before it is considered as a gab in the alignment. A too low number can lead to false gabs and a too high number can lead to false positive.
remove_chain_duplicate = True #@param {type:"boolean"}
#@markdown If `remove_chain_duplicate` is true then is chain duplicates removed from the structure.
outfile_name = "outfile" #param {type:"string"}
#@markdown Choose name of the outfile.
color_by_similarity = True #@param {type:"boolean"}
#@markdown If `color_by_similarity` is true then will the structures in the resulting pymol file be colored based on similarity from the SIMalign algorithm.

# removing spaces from outfile and add .pse
outfile_name = "_".join(outfile_name.split(" "))+".pse"


len_ref_structure, score_ref_structure = SIMalign.run(ref_structure, infilenames, max_iterations, min_aligned_aa, max_dist, remove_chain_duplicate, outfile_name, color=color_by_similarity)
files.download(outfile_name)
print("Done")

In [None]:
#@title Display reference structure
#@markdown Reference structure needs to be a pdb file in order to visualize.

import py3Dmol
import SIMalign

view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js',)
view.addModel(open(ref_structure,'r').read(),'pdb')

if color_by_similarity:
    color_list = []
    for score in score_ref_structure:
        tmp = SIMalign.color_by_number(score)
        string = "rgb("
        for number in tmp:
            string += str(int(number*255))+","
        string[:-1]+")"
        color_list.append(string)

    view.setStyle({'cartoon':{'colorscheme':{'prop':'resi',"gradient":"linear",'colors':color_list,'min':1,'max':len_ref_structure}}})
else:
    view.setStyle({'cartoon': {'color':'spectrum'}})


view.zoomTo()