
# HHSearch-Python 1.0 Wrapper

## Author: Tim D.

### Date: 12th April 2019


* * *

**This notebook is a wrapper for the created module `hhsearch_module.py`**.

_To avoid the unnecassary use of e.g. threading/asyncio to await button interaction, the program walkthrough in this wrapper is done by each step being dealt by a function and called by button interaction instead. 
(Alternatively one could have created asyncio interactions and monitor button interaction in another thread.)
This gives the user an illusion of progressing through interaction windows, while also giving the option to startover the whole process without the need to re-running the entire code cell._

For information/documentation, please use `help(hhsearch_module)` or have a look into the module's file itself `hhsearch_module.py`!

_Note: Make sure the .hhs and .hhm file are somewhere located in a subdir of the current working directory._ `os.walk(os.get_cwd())` _grabs all files within the cwd and is creating a dict() to reference the right files for the functions/widgets to call. So they must be at least somewhere in the folder/in a subdir of the folder. Also, file name copys should be avoided, if they are different from each other, since the dict() will be overwritten with the latest find._

* * *

##### The necessary module can also be installed through pip. 
```pip install hhsearch_python```
##### It can then be imported as a whole with...
```from hhsearch_python import *```

[PyPi](https://pypi.org/project/hhsearch-python/) | [Github](https://github.com/MrRedPandabaer/hhsearch-python)



In [None]:
## Displaying help information about the different functions of the module.. just for convenience.

from hhsearch_python import *
#from hhsearch_module import *
from IPython.display import HTML
from ipywidgets import Layout
import ipywidgets as widgets

def showhelp(x):
    print(help(avaiablefunctions_dict.get(x)))


display(HTML("<code>Select a function to show information about it.</code>"))
avaiablefunctions = list()
avaiablefunctions_dict = {"extract_HHSearch_data" : extract_HHSearch_data,
                          "cmdPNG" : cmdPNG, 
                          "extract_HHSearch_main" : extract_HHSearch_main,
                          "get_alignment_term" : get_alignment_term,
                          "get_full_alignment" : get_full_alignment, 
                          "highlight_hhs_full" : highlight_hhs_full, 
                          "pymol_alignment" : pymol_alignment, 
                          "read_in_frequencies" : read_in_frequencies, 
                          "plot_frequencies" : plot_frequencies}

for key, value in avaiablefunctions_dict.items(): 
    avaiablefunctions.append(key)

selectfunction = widgets.Select(description = "Select Help",
                    options=avaiablefunctions,
                    disabled=False,
                    layout=Layout(width="30%"))

widgets.interact(showhelp, x=selectfunction) 



In [None]:
###### Import block ######

from ipywidgets import Output
import ipywidgets as widgets
from ipywidgets import Layout, HBox
%matplotlib inline
from IPython.display import HTML, Image, display, Markdown, clear_output
#from hhsearch_module import *
from hhsearch_python import *
# Measurment against caching of images
import random
import imageio

############################

###### Function block ######

def printstart():
    """
    Printing the list of all Hits within the .hhs file. 
    """
    global selecthit, __counter__
    
    # each new start resets the counter to avoid showing cached images. 
    __counter__ = random.randint(0,2e12)
    
    # widgets.Select doesn't display /t tabs correctly for whatever reason
    display(HTML(f"<pre>{main_data.get('Query')}</pre>"))
    
    # a little header to explain the different cells
    display(HTML("<pre>No // Hit" \
             "// Descrip // Prob "
             "// E-Value // P-Value" \
             "// Score // Cols " \
             "// Query HMM // Template HMM"))
    
    selecthit = widgets.Select(
                    options=build_up,
                    disabled=False,
                    rows = 20,
                    layout=Layout(width="100%"))
                 
        

def printhit(sender):
    """
    Print the chosen hit, showing the colorized alignment created with get_full_alignment(), while also
    showing the RMSD value, as well as the actual alignment created with PyMol. 
    
    :param sender: linked to the button interaction. 
    """
    
    global main_frequencies, template_frequencies, \
           slider_main, slider_template, \
           frequency_threshold, hit_data, \
           description, rmsd, btnB
    
    # get the position from the chosen hit. 
    value = int(selecthit.value.split()[0])
    # the illusion of a new screen. 
    clear_output()
    
    # get the data for the chosen template, the colorized html formatted full alignment description and show it. 
    hit_data = get_alignment_term(dataread, value)
    description = get_full_alignment(hhs_file = hhs_input, no = value)
    display(HTML(description))
    
    description_lines = description.splitlines()
    q_numbers = list()
    t_numbers = list()

    search_numbers = re.compile(r" [0-9]+ ", re.I)

    for line in description_lines:
        if line.startswith("<pre>Q Consensus"):
            search = search_numbers.finditer(line)
            for x in search:
                q_numbers.append(int(x.group()))
            
        if line.startswith("<pre>T Consensus"):
            search = search_numbers.finditer(line)
            for x in search:
                t_numbers.append(int(x.group()))
    
    q_min, q_max = min(q_numbers), max(q_numbers)
    t_min, t_max = min(t_numbers), max(t_numbers)
                 
    
    
    display(HTML("<pre>The PyMol Alignment should pop up any second now...</pre>"))
    # creating the actual alignment. 
    rmsd = pymol_alignment(main_data.get("pdb_id"),
                           hit_data.get("pdb_id"),
                           main_data.get("alignment_term"),
                           hit_data.get("alignment_term"),
                           main_data.get("full_term"),
                           hit_data.get("full_term"),
                           animated.value,
                           frame_quality.value)
        
                 
    
    # getting the frequencies as a pandas DataFrame of the query and template. 
    main_frequencies = read_in_frequencies(files_dict.get(f'{main_data.get("file_name")}.hhm'))
    template_frequencies = read_in_frequencies(files_dict.get(f'{hit_data.get("file_name")}.hhm'))

    
    # creating two sliders to chose the desired residue span for query/template.                                 
    slider_main = widgets.IntRangeSlider(value=[q_min, q_max],
                                        min=1,
                                        max=len(main_frequencies),
                                        step=1,
                                        description='Plot AS span:',
                                        disabled=False,
                                        continuous_update=False,
                                        orientation='horizontal',
                                        readout=True,
                                        readout_format='d',
                                        layout = Layout(width="80%"))
                 
                 
    slider_template = widgets.IntRangeSlider(value=[t_min, t_max],
                                        min=1,
                                        max=len(template_frequencies),
                                        step=1,
                                        description='Plot AS span:',
                                        disabled=False,
                                        continuous_update=False,
                                        orientation='horizontal',
                                        readout=True,
                                        readout_format='d',
                                        layout = Layout(width="80%"))
                            
    # creating a entry box to enter the desired threshold.                                           
    frequency_threshold = widgets.BoundedFloatText(value=10,
                                                min=0,
                                                max=100.0,
                                                step=0.01,
                                                description='%:',
                                                disabled=False)     
    
    # display the created information and widgets. 
    display(Markdown("RMSD: {0:.2f} A over {1} $C_{{\\alpha}}$".format(*rmsd)))
    if animated.value: 
        display(Markdown('<img src="lastrun/animation_zoom.gif?%d"></img>' % __counter__))
        
        def display_single_frame(x : int = 1):
            global frame
            while x > (15*frame_quality.value):
                x -= (15*frame_quality.value)                                       
            display(Markdown(f'<img src="lastrun/animation_frames/{x}-zoom.png?%d"></img>' % __counter__))
        
        # Creating a slider to "rotate" the alignment.                                                     
        frame_slider = widgets.IntSlider(value=1, min=1, max=56*frame_quality.value, step=1, description='Rotate...',
                                   disabled=False,
                                   continuous_update=True,
                                   orientation='horizontal',
                                   readout=False,
                                   readout_format='d',
                                   layout=Layout(width="70%"))
                                                              
        widgets.interact(display_single_frame, x=frame_slider)                              
        
                                               
    else:
        display(Markdown('<img src="lastrun/main_zoom.png?%d"></img>' % __counter__))
                                               
    display(HTML(f'<pre><b><font color=red>████ - {main_data.get("alignment_term")}</font></b></pre>'))
    display(slider_main)
                 
    display(HTML(f'<pre><b><font color=blue>████ - {hit_data.get("alignment_term")}</font></b></pre>'))
    display(slider_template)
    display(HTML(f'<pre><font color = "green">Note: The frequency spans of the underlying HMM are preselected!.</font></pre>'))
    
    print()
                 
    display(HTML(f"\n<pre>Frequency threshold in %</pre>"))
    display(frequency_threshold)
                 
    
    # creating buttons for either going back to the hit selection screen or to print the barplots. 
    btn_barplot = widgets.Button(description="Show Barplots", tooltip='Create the desired barplots')
    btn_barplot.on_click(plotbarplots)
                 
    btnB = widgets.Button(description='Back', tooltip='Back to the hit selection screen')
    btnB.on_click(startover)
                 
    # align the submit button with the frequency threshold box. 
    button_box = HBox([btn_barplot, btnB])
    display(button_box)
    
    
    
def startover(sender):
    """
    Simply start over from the hit selection screen, skipping the whole hhs and hhm file searching. 
    """
    global btn_b2_hhs_select, animated
    clear_output()
    printstart()
    button_box2 = HBox([animated, frame_quality])             
    button_box = HBox([btnW, btn_b2_hhs_select])
    display(selecthit, 
            animation_notification,
            button_box2,
            button_box)
                 
                 
def plotbarplots(sender):
    """
    Printing out the Barplots, while making sure the cache isn't loaded. 
    """
    global btn_barplot, description, __counter__
    
    # reset counter, avoiding that cached images are shown. 
    __counter__ = random.randint(0,2e12)
    
    display(HTML(f"<pre><b>New barplot prints</b> | Threshold: {frequency_threshold.value}%</pre>"))
    display(HTML("<code>Note: This can take some time depending on the barplot's size</code>"))
                 
    # creating the barplots and saving them in the lastrun folder as well as in the subdirs. 
    plot_frequencies(main_frequencies, name = main_data.get("file_name"), 
                     threshold = frequency_threshold.value/100, 
                     span_start =  slider_main.value[0], 
                     span_end = slider_main.value[1], 
                     filename = "mainbarplot.png")
                 
    plot_frequencies(template_frequencies, 
                     name = hit_data.get("file_name"), 
                     threshold = frequency_threshold.value/100, 
                     span_start =  slider_template.value[0], 
                     span_end = slider_template.value[1], 
                     filename = "templatebarplot.png")
    
    # displaying of the created barplots with a title printed below it. 
    display(Markdown('<img src="lastrun/mainbarplot.png?%d"></img>' % __counter__))
    display(HTML(f"<code><font color = red>{main_data.get('file_name')} - from pos. {slider_main.value[0]} to {slider_main.value[1]} - "\
                 f"frequency threshold {round(frequency_threshold.value)}%</font></code>"))
    display(Markdown('<img src="lastrun/templatebarplot.png?%d"></img>' % __counter__))
    display(HTML(f"<code><font color = blue>{hit_data.get('file_name')} - from pos. {slider_template.value[0]} to {slider_template.value[1]} - "\
                 f"frequency threshold {round(frequency_threshold.value)}%</font></code>"))
    # line to separate each barplot print. 
    display(HTML("<hr>"))

                 
def startwidgetwalkthrough(): 
    """
    Displays a little widget to show the hhs files a user can chose from.
    """
    global selecthhs, hhs_input, btn_continue, queries_dict
                 
    clear_output() 
         
    queries_dict = dict()
    queries_list = list()
                 
    hhs_files = list()
                 
    for key, value in files_dict.items():
        if value.endswith(".hhs"):
            hhs_files.append(value)
                 
    for hhs_file in hhs_files: 
        with open(hhs_file, "r") as f:
            query = f.read()
            query = query.splitlines()[0].split(maxsplit = 1)[1]
            queries_list.append(query)
            queries_dict[query] = hhs_file 
                 
    selecthhs = widgets.Select(
                    options=queries_list,
                    disabled=False,
                    layout=Layout(width="100%"))
                 
    display(HTML("<code>Select your HHSearch (.hhs) file<code>"))             
    display(selecthhs)
    
    btn_continue = widgets.Button(description='Continue', tooltip='Continue to the list of hits')
    btn_continue.on_click(firststart)
    display(btn_continue) 

                 
def HHSearch_widgetinteraction_restart(sender):
    startwidgetwalkthrough()
                 
def firststart(sender):
    """
    Prints the screen which lets the user select the hit for the alignment with the query. 
    """
    global output, hhs_files, hhs_input, build_up, \
                 main_data, dataread, displaystring, \
                 btnW, btn_b2_hhs_select, animated, \
                 frame_quality, animation_notification
    
    clear_output()
    
    # get the selected hhs file.            
    hhs_input = selecthhs.value
    
    hhs_input = queries_dict.get(hhs_input)    
                 
    highlight_hhs_full(hhs_input)
    
    
    # extract the query's information. 
    main_data = extract_HHSearch_main(hhs_input)
    
    # get the list of hits. 
    dataread = extract_HHSearch_data(hhs_input)

    build_up = list() 
    # the select widget doesn't show /t properly. So we build our own str() list here. 
    for x in range(len(dataread)):
        displaystring = f'{dataread.get("No")[x]}  //  ' \
                        f'{dataread.get("Hit")[x]}  //  ' \
                        f'{dataread.get("Description")[x]}  //  ' \
                        f'{dataread.get("Prob")[x]}  //  ' \
                        f'{(dataread.get("E-Value")[x]):.1e}  //  ' \
                        f'{(dataread.get("P-Value")[x]):.1e}  //  ' \
                        f'{dataread.get("Score")[x]}  //  ' \
                        f'{dataread.get("Cols")[x]}  //  ' \
                        f'{dataread.get("Query HMM")[x]}  //  ' \
                        f'{dataread.get("Template HMM")[x]}'
        build_up.append(displaystring)

    # display's the actual list for selection of the hit. 
    # the select widget "selecthit" is also declared in this function. 
    printstart()
    
    # little toggle button to make it a choice if somebody wants an animated y-axis rotating PyMol Gif.
    animated = widgets.ToggleButton(value=False, 
                                    description='Animation', 
                                    disabled=False, 
                                    button_style='success', 
                                    tooltip='Press me to enable the animation :)')
                 
    frame_quality = widgets.IntSlider(value=1, min=1, max=4, step=1, description='Frame X',
                                   disabled=False,
                                   continuous_update=True,
                                   orientation='horizontal',
                                   readout=True,
                                   readout_format='d')
                 
    
                 
    btnW = widgets.Button(description='Submit', tooltip='Submit your chosen hit to be aligned with the query!')
    btnW.on_click(printhit)
                 
    btn_b2_hhs_select = widgets.Button(description="Back", tooltip='Back to the HHS file Selection')
    btn_b2_hhs_select.on_click(HHSearch_widgetinteraction_restart)
    
    # align the buttons for better looks. 
    button_box = HBox([btnW, btn_b2_hhs_select])
    button_box2 = HBox([animated, frame_quality])
    animation_notification = HTML("<pre>Do you want the alignment to be animated?</pre>\n <code><b>Note: This takes much longer! "
                                  "It's recommended to set Frame X to one or max two. Two is already time consuming.</b>\n"
                                  "The Frame X multiplier increases the amount of frames per 360° view. One equals 14 images per 360°, Two 28 images etc.</code>")
    display(selecthit, 
            animation_notification,
            button_box2, 
            button_box)
                 
    
      
############################
                 
                 
###### Program block ###### 
                 
# collecting all directories of all the .hhm and .hhs files in the current working directory. 
all_paths = os.walk(os.getcwd())
files_dict = dict()
for path in all_paths:
    for file in path[2]:
        if file.endswith(".hhs") | file.endswith(".hhm"):
            full_path = f"{path[0]}\\{file}"
            full_path = full_path.replace("\\","/") # Windows/nt problems...
            files_dict[file] = full_path
                 
# starting the illusion.
startwidgetwalkthrough()
                
