In [1]:
from Bio import SeqIO
from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import os
import pandas as pd
import ipywidgets as widgets
from ipywidgets import *
import time
import stat

In [120]:
#These dictionaries are used to link dropdown options to a boolean function that determines whether or not the sequence is separated properly by the media
sizeDict = {
    "Bio-P 0.1-1.8 kDa" : lambda x : bool(100<x.molecular_weight()<1800),
    "Bio-P 0.8-4.0 kDa" : lambda x : bool(800<x.molecular_weight()<4000),
    "Bio-P 1.0-6.0 kDa" : lambda x : bool(1000<x.molecular_weight()<6000),
    "Bio-P 1.5-20.0 kDa" : lambda x : bool(1500<x.molecular_weight()<20000),
    "Bio-P 2.5-40.0 kDA" : lambda x : bool(2500<x.molecular_weight()<40000),
    "Bio-P 3.0-60.0 kDa" : lambda x : bool(3000<x.molecular_weight()<60000),
    "Bio-P 5.0-100 kDa" : lambda x : bool(5000<x.molecular_weight()<100000),
    "S-X 0.4-14.0 kDa" : lambda x : bool(400<x.molecular_weight()<14000),
    "S-X <2.0 kDA" : lambda x : bool(0<x.molecular_weight()<2000),
    "S-X <0.4 kDA" : lambda x : bool(0<x.molecular_weight()<400),
    "Bio-A 10.0 - 500 kDA" : lambda x : bool(10000<x.molecular_weight()<500000),
    "Bio-A 10.0 - 1500 kDA" : lambda x : bool(10000<x.molecular_weight()<1500000),
}
ionDict = {
    "Q Media (Triethylamine +)" : lambda x : bool(x.charge_at_pH(pHslider.value)<=-0.01),
    "S Media (Sulfite -)" : lambda x : bool(x.charge_at_pH(pHslider.value)>=0.01),
}
affinityDict ={}

#This dictionary holds the previous ones for the method selection menu
methodDict = {
    'Size Exclusion' : sizeDict,
    'Ion Exchange' : ionDict,
    'Affinity Chromatography' : affinityDict,
}



previousInputs=os.listdir('data') #List to keep track of previous files for conveinience
previousInputs.remove('.ipynb_checkpoints')
style = {'description_width': 'initial'} #shorthands for widget appearances description width
autoLayout = Layout(width='auto')

In [106]:
def confirmMethod(_): #Swaps media selection to match method
    mediaSelect.options = methodDict[methodSelect.value].keys()

methodSelect = widgets.Dropdown(options=methodDict.keys(),description='Method:')
methodButton = widgets.Button(description='Confirm Method')
methodButton.on_click(confirmMethod)
mediaSelect = widgets.Dropdown(options=methodDict[methodSelect.value].keys(),description='Media')
pHslider = widgets.FloatSlider(value=7.0,min=0,max=14,step=0.1,description='pH',style=style)
selectDisplay = TwoByTwoLayout(top_left=methodSelect,top_right=methodButton,bottom_left=mediaSelect,bottom_right=pHslider,layout=Layout(width='50%'))
display(selectDisplay)

TwoByTwoLayout(children=(Dropdown(description='Method:', layout=Layout(grid_area='top-left'), options=('Size E…

In [27]:
def confirmInput(_):
    if inputFile.value in os.listdir('data'):
        if inputFile.value not in previousInputs:
            previousInputs.append(inputFile.value)
        currentInput.value = inputFile.value
        inputFile.options = previousInputs
        errorText.value = ''
    else:
        errorText.value = f'Error: \"{inputFile.value}\" not found'

boxLayout = Layout(width='30%')

inputFile = widgets.Combobox(value='',placeholder='Enter a file to be separated',options=previousInputs,description='Unseparated data',style = style)
inputButton = widgets.Button(description='Confirm File')
currentInput = widgets.HTML(value='No file selected',description='Current input:',style = style)
errorText = widgets.HTML(value=None,description='\t',style={'text_color':'#CC0000','font_size':'16px'})

#These two do the same thing, but adds redundancy so the user can hit Enter or click the button.
inputButton.on_click(confirmInput)
#inputFile.on_submit(confirmInput)


#inputDisplay = TwoByTwoLayout(top_left=inputFile,top_right=inputButton,bottom_left=currentInput,layout=Layout(width='50%'))
inputTop = Box(children=[inputFile,inputButton],layout=boxLayout)
inputBottom = Box(children=[currentInput,errorText],layout=boxLayout)
inputDisplay=VBox([inputTop,inputBottom])
#errorText = widgets.Text(value=None, disabled=True, style={'background':'white','text_color':'#CC0000','font_size':'20px'},layout=autoLayout)

display(inputDisplay)




VBox(children=(Box(children=(Combobox(value='', description='Unseparated data', options=('E_coli.faa', 'test.t…

In [83]:
out = widgets.Output()

def checkFolder(_):
    if os.path.isdir(os.path.join('outputs',outputFile.value)):
        with out:
            header.description=f'\"{outputFile.value}\" already exists. Some files may be replaced. Continue?'
            outputFile.disabled = outputButton.disabled = True
            display(overwriteCheck)
    else:
        with out:
            dataFolder = outputFile.value
            os.mkdir(os.path.join('outputs',dataFolder))
            outputFile.disabled = outputButton.disabled = True
        

def confirmOverwrite(_):
    with out:
        out.clear_output()
        display(outputDisplay)
    
def denyOverwrite(_):
    with out:
        out.clear_output()
        display(outputDisplay)
        outputFile.value = ''
        outputFile.disabled = outputButton.disabled = False

#Generate the output name entry
outputFile = widgets.Text(placeholder='Enter a label for the output folder.', style=style,disabled=False)
outputButton = widgets.Button(description='Confirm',disabled=False)
outputDisplay = Box(children=[outputFile,outputButton])

#Generate the overwrite prompt
confirmButton = widgets.Button(description='Yes',layout=Layout(width='auto'))
denyButton = widgets.Button(description='No',layout=Layout(width='auto'))
header = widgets.HTML(description=f'\"{outputFile.value}\" already exists. Continue?',layout=Layout(justify_content='center',width='auto'),style=style)
YesNo = Box(children=[confirmButton, denyButton],layout=Layout())
overwriteCheck = VBox([header,YesNo],layout=Layout(justify_content='center',),)

#Define button functionality
outputButton.on_click(checkFolder)
confirmButton.on_click(confirmOverwrite)
denyButton.on_click(denyOverwrite)

with out:
    display(outputDisplay)

out

Output()

In [121]:
dataFile = os.path.join('data',inputFile.value)
method = methodDict[methodSelect.value][mediaSelect.value]

dataDict = {
    'sequence' : [],
    'MW' : [],
    f'charge at ph {pHslider.value}' : [],
    'description' : [],
    'ID' : [],
    
}

with open(dataFile) as protfile:
    for record in SeqIO.parse(protfile,"fasta"):
        sequence = str(record.seq)
        protparams = PA(sequence.replace("X","Q"))
        if method(protparams):
            dataDict['sequence'].append(sequence)
            dataDict['MW'].append(protparams.molecular_weight())
            dataDict[f'charge at ph {pHslider.value}'].append(round(protparams.charge_at_pH(pHslider.value),2))
            dataDict['description'].append(record.description)
            dataDict['ID'].append(record.id)


df = pd.DataFrame.from_dict(dataDict)

df.sort_values(by=[f'charge at ph {pHslider.value}'], inplace=True, ascending=False)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,sequence,MW,charge at ph 7.0,description,ID
0,MSTLILPDWPMPARVRACSTTRHGGVSVSPYDSLNLGTHVGDVATH...,26249.8688,-0.02,tr|Q7CK84|Q7CK84_YERPE Putative uncharacterize...,tr|Q7CK84|Q7CK84_YERPE
1,MLQAERHKIICTHVQQQGSALVTELSVLCQVSQETIRRDLTVLQKK...,30240.3375,-0.02,tr|Q7CKA8|Q7CKA8_YERPE Putative deoR-family re...,tr|Q7CKA8|Q7CKA8_YERPE
2,MKIPIPSIGHVALLASAILFASSANAASDEDGIIIYNAQHESFAKS...,36335.8311,-0.03,tr|Q7CJD9|Q7CJD9_YERPE Iron(III)-binding perip...,tr|Q7CJD9|Q7CJD9_YERPE
3,MLKWQATTACEDPAEGEELHRLVADIPIGILQHITLRRQFWRTACA...,28219.2647,-0.03,tr|Q7CK65|Q7CK65_YERPE Putative uncharacterize...,tr|Q7CK65|Q7CK65_YERPE
4,MAQQVQLSATVAESQLGQRLDQALAELFPDYSRSRIKEWILDSRVT...,36705.6883,-0.04,sp|Q8ZBV7|RLUD_YERPE Ribosomal large subunit p...,sp|Q8ZBV7|RLUD_YERPE
...,...,...,...,...,...
2400,MDNLRFSSAPTADSIDASIAQHYPDCEPVAVIGYACHFPESPDGET...,348794.3311,-89.33,tr|Q9Z373|Q9Z373_YERPE HMWP1 nonribosomal pept...,tr|Q9Z373|Q9Z373_YERPE
2401,MRPTMNKNLYRIIFNKVRGMMIVVADIAASGRASSSPSSGLGHTQH...,261177.3958,-106.41,tr|Q0WE34|Q0WE34_YERPE Putative hemolysin OS=Y...,tr|Q0WE34|Q0WE34_YERPE
2402,MLNYFRAILISWKWKLSHHTSRPHDVKEKGHPRKIKVVAWITLFFQ...,308572.7179,-111.91,tr|Q7CFY4|Q7CFY4_YERPE Putative invasin OS=Yer...,tr|Q7CFY4|Q7CFY4_YERPE
2403,MPNGNEMAGFYIDKLSLSQRLSIVSETYDRVNKNNKKEKLKYSYDD...,353076.3328,-119.72,tr|Q7CGR6|Q7CGR6_YERPE Putative virulence dete...,tr|Q7CGR6|Q7CGR6_YERPE
