In [27]:
"""
Use Bokeh to visualize mod predictions

input files:
- tRNA mod calls tab-delimited text file
- tRNA mod scores text file
- protein prediction file

outputs:
- interactive html file bilt using bokeh

"""

def parseInput():
    """Parse command line input and output files"""
    import argparse
    
    #Argparse setup
    argParser = argparse.ArgumentParser(description = 'This program generates track hubs that display Modomics data')
    argParser.add_argument('-c', '--mod_calls', required = True, help = 'modification calls file')
    argParser.add_argument('-o', '--output_file', required = True, help = 'output html file')
    argParser.add_argument('-k', '--kingdom', required = False, choices = {'E', 'A', 'B'}, help = 'Domain of organism')
    argParser.add_argument('-s', '--mod_scores', required = True, help = 'modification scores file')
    argParser.add_argument('-e', '--enzyme_predictions', required = True, help = 'enzyme predictions file')
    
    kingdoms = {'E': 'Eukaryota', 'A': 'Archaea', 'B': 'Bacteria'}
    
    clArgs = argParser.parse_args('-c ./strepneumo_test/modCalls.txt -s ./strepneumo_test/modScores.txt -e ./strepneumo_test/output.tsv -o ./strepneumo_test'.split())
    callFile = clArgs.mod_calls
    #orgKing = kingdoms[clArgs.kingdom]
    outFile = clArgs.output_file
    scoreFile = clArgs.mod_scores
    protFile = clArgs.enzyme_predictions
    
    return callFile, scoreFile, protFile, outFile

def readTSV(inputFile):
    """Read a TSV with row and column headers"""
    
    #Store mod info: mod: {modified_position: {'+': [tRNAs], '-': [tRNAs]]}
    seqsDict = {}
    
    with open(inputFile, 'r') as inF:
        
        lines = inF.readlines()
        
        #TSV columns
        cols = lines[0].strip().split('\t')
        
        for line in lines[1:]:
            splitLine = line.strip().split('\t')
            
            #tRNA name
            tRNAname = splitLine[0]
            
            #Make dictionary of sprinzl positions
            seqsDict[tRNAname] = {}
            
            #Iterate through sprinzl positions
            for sprinzlPos, base in zip(cols[1:], splitLine[1:]):
                
                seqsDict[tRNAname][sprinzlPos] = base
                    
        inF.close()
    
    return seqsDict

def bit2prob(bitScore):
    """Convert bit score to probability, assuming a binary outcome"""
    
    return 1/(1+2**(-bitScore))
    


def makeFig(callDict, scoreDict, protDict, outF):
    """Make an interactive cloverleaf output figure"""
    import numpy as np
    from bokeh.plotting import figure, show, save
    from bokeh.models import ColumnDataSource, Grid, LinearAxis, Plot, Text, ColorBar
    from tRNAinfo import cloverCoords
    
    """
    Goals for this code
    - Get rid of axes and grid lines
    - adjust sizes of circles for variable loop
    - add key for heat map
    - general fine-tuning
    """
    
    
    #Construct color key for heat map
    colKey = []
    keyX = []
    keyY = []
    keyLabels = [] #ADD LABELS ONCE COLOR STUFF FIGURED OUT!
    
    xPos = 18.539
    #Construct blues
    for prob in np.arange(0, 0.5, 0.01):
        keyX.append(xPos)
        keyY.append(-300 + prob*1/0.01)
        colKey.append((255-(200*(0.5-prob)), 255-(200*(0.5-prob)), 255))
            
    #colKey.append((255,255,255)) #Add white at center
    #keyX.append(xPos)
    #keyY.append(max())
    
    #construct reds
    for prob in np.arange(0.5, 1, 0.01):
        keyX.append(xPos)
        keyY.append(-300 + prob*1/0.01)
        colKey.append((255, 255-(200*(prob-0.5)), 255-(200*(prob-0.5))))
        
    keyMap = ColumnDataSource({'x': keyX, 'y': keyY, 'col': colKey})
    
    
    #Iterate through tRNA 120.418
    for tRNA in callDict.keys():
        
        ################################################################################################################
        #Format data vectors
        
        #Store data vetors
        X = []
        Y = []
        col = []
        scores = []
        bases = []
        probs = []
        positions = []
        sizes = []
        
        #Add different values to different vectors
        for pos in cloverCoords.keys():
            
            positions.append(pos)
            bases.append(callDict[tRNA][pos])
            
            X.append(cloverCoords[pos][0])
            Y.append(-cloverCoords[pos][1])
            
            #Select color
            if scoreDict[tRNA][pos] != '-':
                
                #append probabilities
                prob = bit2prob(float(scoreDict[tRNA][pos]))
                probs.append(prob)
                
                #Handle -inf values which I haven't fixed with laplace pseudocount
                try:
                    scores.append(round(float(scoreDict[tRNA][pos]), 1))
                except OverflowError:
                    scores.append(float(scoreDict[tRNA][pos]))
                
                #Color the heat map based on mod score
                if prob > 0.5:
                    col.append("#%02x%02x%02x" % (255, int(round(255-(200*(prob-0.5)))), int(round(255-(200*(prob-0.5))))))
                
                elif prob < 0.5:
                    col.append("#%02x%02x%02x" % (int(round(255-(200*(0.5-prob)))), int(round(255-(200*(0.5-prob)))), 255))
                
                else:
                    col.append("#%02x%02x%02x" % (255, 255, 255))
                    
            else:
                probs.append('None')
                scores.append('None')
                col.append("#%02x%02x%02x" % (235, 235, 235))
                
            #Add size of circle
            if pos[0] == 'e':
                sizes.append(cloverCoords[pos][2]+12)
            else:
                sizes.append(cloverCoords[pos][2]+13)
        
        ##########################################################################################################
        
        cloverSource = ColumnDataSource({'x': X, 'y': Y, 'col': col, 
                                         'bases': bases, 'scores': scores, 
                                         'probs': probs, 'positions': positions, 
                                         'sizes': sizes})
        
        Tooltips = [('modification', '@bases'), 
                    ('score', '@scores'),
                    ('position', '@positions')]
                    #('possible enzymes', 'placeholder')]
        
        #Instantiate cloverleaf structure 
        clover = figure(title = '{0}'.format(tRNA), x_axis_label = '', y_axis_label = '',
                        toolbar_location=None, tools = 'hover', tooltips = Tooltips)
            
        
        #Plot circles
        #sprinzlCircle = clover.circle(X, Y, size = sizes, fill_color = col,
        #                                  line_color = 'grey')
            
        #This works with the mouse-over pop ups
        sprinzlCircle = clover.circle('x', 'y', size = 'sizes', fill_color = 'col',
                                          line_color = 'grey', name = 'positions', source = cloverSource)
        
        #Plot text
        baseText = Text(x = 'x', y = 'y', 
                        text = 'bases', text_color = 'black', 
                        text_align = 'center', text_baseline = 'middle')
        
        clover.add_glyph(cloverSource, baseText)
        
        
        #Color key
        # use fill_color=linear_cmap('dataCol', 'col', 0, max(bins.counts)
        
        
        
        #Remove excess visual info from default plotting
        clover.xaxis.major_tick_line_color = None
        clover.yaxis.major_tick_line_color = None
        clover.xaxis.minor_tick_line_color = None
        clover.yaxis.minor_tick_line_color = None
        clover.xgrid.grid_line_color = None
        clover.ygrid.grid_line_color = None
        clover.xaxis.major_label_text_font_size = '0pt'
        clover.yaxis.major_label_text_font_size = '0pt'
            
        save(clover, filename = '{0}/{1}.html'.format(outF, tRNA))
        
        

    
    
    

def main():
    import pandas as pd
    from tRNAinfo import sprinzl2coords
    
    
    #Recieve input commands
    calls, scores, proteins, outFile = parseInput()
    
    callsDict = readTSV(calls)
    scoresDict = readTSV(scores)
    
    makeFig(callsDict, scoresDict, proteins, outFile)
    
    
    
    
main()
    