In [1]:
from Bio import SeqIO
from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import os
import pandas as pd
import ipywidgets as widgets
from ipywidgets import *
import time
import stat

In [2]:
#These dictionaries are used to link dropdown options to a boolean function that determines whether or not the sequence is separated properly by the media
sizeDict = {
    "Bio-P 0.1-1.8 kDa" : lambda x : bool(100<x.molecular_weight()<1800),
    "Bio-P 0.8-4.0 kDa" : lambda x : bool(800<x.molecular_weight()<4000),
    "Bio-P 1.0-6.0 kDa" : lambda x : bool(1000<x.molecular_weight()<6000),
    "Bio-P 1.5-20.0 kDa" : lambda x : bool(1500<x.molecular_weight()<20000),
    "Bio-P 2.5-40.0 kDA" : lambda x : bool(2500<x.molecular_weight()<40000),
    "Bio-P 3.0-60.0 kDa" : lambda x : bool(3000<x.molecular_weight()<60000),
    "Bio-P 5.0-100 kDa" : lambda x : bool(5000<x.molecular_weight()<100000),
    "S-X 0.4-14.0 kDa" : lambda x : bool(400<x.molecular_weight()<14000),
    "S-X <2.0 kDA" : lambda x : bool(0<x.molecular_weight()<2000),
    "S-X <0.4 kDA" : lambda x : bool(0<x.molecular_weight()<400),
    "Bio-A 10.0 - 500 kDA" : lambda x : bool(10000<x.molecular_weight()<500000),
    "Bio-A 10.0 - 1500 kDA" : lambda x : bool(10000<x.molecular_weight()<1500000),
}
ionDict = {
    "Q Media (Triethylamine +)" : lambda x : bool(x.charge_at_pH(pHslider.value)<=-0.01),
    "S Media (Sulfite -)" : lambda x : bool(x.charge_at_pH(pHslider.value)>=0.01),
}
affinityDict ={}

#This dictionary holds the previous ones for the method selection menu
methodDict = {
    'Size Exclusion' : sizeDict,
    'Ion Exchange' : ionDict,
    'Affinity Chromatography' : affinityDict,
}



previousInputs=os.listdir('data') #List to keep track of available files for conveinience
previousInputs.remove('.ipynb_checkpoints')

style = {'description_width': 'initial'} #shorthands for widget appearances description width
autoLayout = Layout(width='auto')

In [3]:
def confirmMethod(_): #Swaps media selection to match method
    mediaSelect.options = methodDict[methodSelect.value].keys()

methodSelect = widgets.Dropdown(options=methodDict.keys(),description='Method:') #Dropdown menu to select separation method
methodButton = widgets.Button(description='Confirm Method') #Swaps media menu to match selected method
methodButton.on_click(confirmMethod)

mediaSelect = widgets.Dropdown(options=methodDict[methodSelect.value].keys(),description='Media') #Dropdown menu to select separation media
pHslider = widgets.FloatSlider(value=7.0,min=0,max=14,step=0.1,description='pH',style=style) #Slider to determine pH
selectDisplay = TwoByTwoLayout(top_left=methodSelect,top_right=methodButton,bottom_left=mediaSelect,bottom_right=pHslider,layout=Layout(width='50%')) #Arranges previous widgets
display(selectDisplay)

TwoByTwoLayout(children=(Dropdown(description='Method:', layout=Layout(grid_area='top-left'), options=('Size E…

In [4]:
def confirmInput(_): #Validates that the input file is valid
    if inputFile.value in os.listdir('data'):
        if inputFile.value not in previousInputs:
            previousInputs.append(inputFile.value)
        currentInput.value = inputFile.value
        inputFile.options = previousInputs
        errorText.value = ''
    else:
        errorText.value = f'Error: \"{inputFile.value}\" not found'

boxLayout = Layout(width='30%')

inputFile = widgets.Combobox(value='',placeholder='Enter a file to be separated',options=previousInputs,description='Unseparated data',style = style)
inputButton = widgets.Button(description='Confirm File')
currentInput = widgets.HTML(value='No file selected',description='Current input:',style = style)
errorText = widgets.HTML(value=None,description='\t',style={'text_color':'#CC0000','font_size':'16px'})


inputButton.on_click(confirmInput)



#inputDisplay = TwoByTwoLayout(top_left=inputFile,top_right=inputButton,bottom_left=currentInput,layout=Layout(width='50%'))
inputTop = Box(children=[inputFile,inputButton],layout=boxLayout)
inputBottom = Box(children=[currentInput,errorText],layout=boxLayout)
inputDisplay=VBox([inputTop,inputBottom])
#errorText = widgets.Text(value=None, disabled=True, style={'background':'white','text_color':'#CC0000','font_size':'20px'},layout=autoLayout)

display(inputDisplay)




VBox(children=(Box(children=(Combobox(value='', description='Unseparated data', options=('E_coli.faa', 'Yersin…

In [5]:
out = widgets.Output()

def checkFolder(_):
    if os.path.isdir(os.path.join('outputs',outputFile.value)):
        with out:
            header.description=f'\"{outputFile.value}\" already exists. Some files may be replaced. Continue?'
            outputFile.disabled = outputButton.disabled = True
            display(overwriteCheck)
    else:
        with out:
            dataFolder = outputFile.value
            os.mkdir(os.path.join('outputs',dataFolder))
            outputFile.disabled = outputButton.disabled = True
        

def confirmOverwrite(_):
    with out:
        out.clear_output()
        display(outputDisplay)
    
def denyOverwrite(_):
    with out:
        out.clear_output()
        display(outputDisplay)
        outputFile.value = ''
        outputFile.disabled = outputButton.disabled = False

#Generate the output name entry
outputFile = widgets.Text(placeholder='Enter a label for the output folder.', style=style,disabled=False)
outputButton = widgets.Button(description='Confirm',disabled=False)
outputDisplay = Box(children=[outputFile,outputButton])

#Generate the overwrite prompt
confirmButton = widgets.Button(description='Yes',layout=Layout(width='auto'))
denyButton = widgets.Button(description='No',layout=Layout(width='auto'))
header = widgets.HTML(description=f'\"{outputFile.value}\" already exists. Continue?',layout=Layout(justify_content='center',width='auto'),style=style)
YesNo = Box(children=[confirmButton, denyButton],layout=Layout())
overwriteCheck = VBox([header,YesNo],layout=Layout(justify_content='center',),)

#Define button functionality
outputButton.on_click(checkFolder)
confirmButton.on_click(confirmOverwrite)
denyButton.on_click(denyOverwrite)

with out:
    display(outputDisplay)

out

Output()

In [9]:
inFolder = os.path.join('data',currentInput.value)
outFolder = os.path.join('outputs',outputFile.value)
dataName = outputFile.value
method = methodDict[methodSelect.value][mediaSelect.value]
totals = []
wash = []
fractions = []

with open(inFolder) as protfile:
    for record in SeqIO.parse(inFolder,'fasta'):
        totals.append(record)
        sequence = str(record.seq)
        protparams = PA(sequence.replace('X','Q')
        if method(protparams):
            fractions.append(record)
        else:
            wash.append(record)

                        
SeqIO.write(totals,os.path.join(outFolder,dataName+"_total.faa"),"fasta")
SeqIO.write(wash,os.path.join(outFolder,dataName+"_wash.faa"),"fasta")
SeqIO.write(fractions,os.path.join(outFolder,dataName+"_separated.faa"),"fasta")

SyntaxError: invalid syntax (<ipython-input-9-c46322ad0d05>, line 14)

In [None]:
fracSlider = widgets.IntSlider(value=7,min=1,max=100,step=1,description='# of fractions',style=style)

In [None]:
dataDict = {
    'Sequence' : [],
    'Length' : [],
    'Isoelectric_point' : [],
    'Molecular Weight (Da)' : [],
    'Charge at pH {}'.format(pHslider.value) : [],
    'ID' : [],
    'Description' : []
}

#Create pandas dataframe to sort separated data

with open(os.path.join(outFolder,dataName+"_separated.faa")):
    for record in SeqIO.parse(inFolder,'fasta'):
        sequence = str(record.seq)
        protparams = PA(sequence.replace('X','Q')
        dataDict['Sequence'].append(sequence)
        dataDict



In [None]:
nfractions = 7
fraclen = round(len(df)/nfractions)
noise = 0.10
for n in range(nfractions - 1):
    fracrecords = []
    #Generate min/max range values
    fuzzymin = int(round(n*fraclen-noise*fraclen,0))
    fuzzymax = int(round((n+1)*fraclen + noise*fraclen,0))
    if fuzzymin < 0:
        fuzzymin = 0
    if fuzzymax > len(df):
        fuzzymax = len(df)
    tempdf = df.iloc[fuzzymin:fuzzymax]
    for index  in tempdf.index:
        record = SeqRecord(Seq(tempdf['sequence'][index]),id=str(tempdf["ID"][index]),description=tempdf["description"][index])
        fracrecords.append(record)
        if param_of_interest(tempdf['sequence'][index]) == True:
            if n+1 in seqhits:
                seqhits[n+1][0] += 1
                seqhits[n+1][1].append(index)
            else:
                seqhits[n+1] = [1,[index]]
    SeqIO.write(fracrecords,os.path.join(data_folder,data_name+"_fraction"+ str(n+1) + ".faa"),"fasta")

tempdf = df.iloc[int(round((nfractions-1)*fraclen-noise*fraclen,0)):]
remainderrecord = []
for index  in tempdf.index:
        record = SeqRecord(Seq(tempdf['sequence'][index]),id=str(tempdf["ID"][index]),description=tempdf["description"][index])
        remainderrecord.append(record)
        if param_of_interest(tempdf['sequence'][index]) == True:
            if nfractions in seqhits:
                seqhits[nfractions][0] += 1
                seqhits[nfractions][1].append(index)
            else:
                seqhits[nfractions] = [1,[index]]
SeqIO.write(remainderrecord,os.path.join(data_folder,data_name+"_fraction"+ str(nfractions) + ".faa"),"fasta")
tempdf

In [7]:
'Charge at pH {}'.format(pHslider.value)

'Charge at pH 7.0'