##Color by similarity

**How to:**
1.   Choose the wanted options below.
2.   Press "Runtime" -> "Run all".
3.   A bottum saying "Choos Files" below "Run prediction" will appear in some minutes. Press it and choose all the structures that you want to analyse (At least 3 structures).
4.   A dropdown menu will appear. Choose the structure that you want to be the reference and Press "SELECT".

In [None]:
#@title User inputs

max_iterations = 3 #@param {type:"integer"}
#@markdown `max_iterations` is the maximum number of alignments. A high number can lead to slow runtime.
min_aligned_aa = 100 #@param {type:"integer"}
#@markdown `min_aligned_aa` is how many amino acid that minimum should be used for alignment. A low number can lead to overfitting.
max_dist = 6 #@param {type:"integer"}
#@markdown `max_dist` is the maximum length between to amino acids before it is considered as a gab in the alignment. A too low number can lead to false gabs and a too high number can lead to false positive.
remove_chain_duplicate = True #@param {type:"boolean"}
#@markdown If `remove_chain_duplicate` is true then is chain duplicates removed from the structure.
outfile_name = "outfile"
#@markdown Choose name of the outfile.

#@markdown Be aware that overfitting can occur if the amino acid sequences are too similar. It is recomanded that seqeunces of more than 80% similarity are removed before importing to the program.

import ipywidgets as widgets
from IPython.display import display
from google.colab import files
import sys
import os

# removing spaces from outfile and add .pse
outfile_name = "_".join(outfile_name.split(" "))+".pse"

# Removing old uploads
OK_files = {".config", "condacolab_install.log", "__pycache__", "PYMOL_READY", "color_by_similarity.py", "sample_data"}
for file in os.listdir():
    if file not in OK_files:
        os.remove(file)

# Create a list of options for the dropdown
infiles = files.upload()
infilenames = list(infiles.keys())
for file in infilenames:
    if " " in file:
        print("ERROR: No spaces in the names of the uploaded files are allowed.")
        sys.exit(1)


# Create a dropdown widget
dropdown = widgets.Dropdown(
    options=infiles,
    description='Reference structure:'
)

# Create a button widget
proceed_button = widgets.Button(
    description='SELECT',
    disabled=False,
    button_style='',  # 'success' or '' (empty) for default
    tooltip='Click to proceed',
    icon=''
)


# Function to handle button click
def on_button_click(b):
    selected_option = dropdown.index
    ref_structure = infilenames[selected_option]
    print(f'Selected reference structure: {ref_structure}')


# Attach the button click function to the button's click event
proceed_button.on_click(on_button_click)

# Display the widgets
display(dropdown)
display(proceed_button)

In [None]:
#@title Install dependencies


import os
if not os.path.isfile("PYMOL_READY"):
    print("installing condacolab...")
    os.system("pip install -q condacolab")
    import condacolab
    condacolab.install()
    print("installing rdkit...")
    os.system("mamba install -c conda-forge rdkit")
    print("installing pymol...")
    os.system("mamba install -c conda-forge pymol-open-source")
    os.system("wget https://raw.githubusercontent.com/rdkit/rdkit/master/Docs/Book/data/cdk")
    os.system("which pymol")
    import subprocess
    cmd = subprocess.Popen(["pymol", "-cKRQ"])
    os.system("ps aux | grep pymol")
    raw_script = "https://raw.githubusercontent.com/RuneROe/git_color_by_similarity/master/color_by_similarity.py"
    local_script_path = "/content/color_by_similarity.py"
    os.system(f"wget {raw_script} -O {local_script_path}")
    os.system("touch PYMOL_READY")
    print("Done")
else:
    print("Dependendies already installed.")

In [None]:
#@ Run prediction
import color_by_similarity
color_by_similarity.run(ref_structure, infilenames, max_iterations, min_aligned_aa, max_dist, remove_chain_duplicate, outfile_name)
files.download(outfile_name)
