In [None]:
from poolData import protein, interactionList, pooledDataset, proteinProteinMatrix
from poolSolver import parallelProteinSolver, subsetSelectionProteinSolvers, bestSubsetSelectionPoolSolver 
import importlib
importlib.reload(proteinProteinMatrix)
importlib.reload(pooledDataset)
importlib.reload(protein)
importlib.reload(subsetSelectionProteinSolvers)

In [2]:
#Choose the set of known interactions to include
#Reads the dataset
knownPPIs = protein.protein.readProteinProteinAnnotationsUsingPipeline("20250813_yeast_literature_interactions_UniProtIDs", dropHeader=True)

In [None]:

print("> Reads the dataset")
rawPooledData = pooledDataset.pooledDataset.readStandardizeDataset('20250512_Pooled_Lysate_IPs', interceptOption = 'means_ones', customIntercept = None)
knownPPIs = protein.protein.readProteinProteinAnnotationsUsingPipeline("2023-Michaelis-YeastPPIs.uniprot", dropHeader=True)
print("  Distribution of maximum value:")
rawPooledData.distributionOfRankedValue(1,type="value")
print("  Distribution of mean value:")
rawPooledData.distributionOfRankedValue(15,type="mean")
print("  Distribution of third highest value :")
rawPooledData.distributionOfRankedValue(3,type="value")


print("> Filters and normalizes the dataset.")
processedData = rawPooledData.filterMissingValues(minnonZeros=3, verbose=True)
processedData = processedData.normalizePools_L2(1e-5, verbose=True, maxPreys=10)
processedData = processedData.filterByRankedValue(1e7, 3, type="value", verbose=True)
processedData = processedData.normalizeByRankedValue(3, type="mean")


In [None]:
#####################
# Plot for figure 3c
#the negative controls/missing 
# baits are represented by grey squares 
# in the signal and coefficient matrices 
#####################

#only solve for bait proteins 
trainingData=processedData.filterForBaits()
nnls_bss = bestSubsetSelectionPoolSolver.NNLSBestSubsetSelectionProteinSolver(trainingData, stopping_criteria=10, model='F_global')
#Builds the full solver
pps = parallelProteinSolver.parallelPoolSolver(trainingData,nnls_bss)
#Solves the experiment
ppiMatrix = pps.solveExperiment()
trainingData=processedData.filterForBaits()
trainingDataMissingBaits = trainingData.includeMissingBaits()
ppiMatrixMissingBaits = ppiMatrix.includeMissingBaits()
ppiMatrixMissingBaits.plotCombinedMixSignalBetaPlot(trainingData=trainingDataMissingBaits, proteinIndices=range(len(trainingDataMissingBaits.signalMatrix)), mixingIndices=range(len(trainingDataMissingBaits.mixingMatrix.T)), imageScale=0.5, xPad=0, yPad=0, specialOrientation = True, ppiAnnotations=[], sigThreshold=5)

In [None]:
############################
# Plot for figure 3b: 
# correlation plots
###########################

colors = {}
colors['mixing'] = ppiMatrix.cMix
colors['pool'] = ppiMatrix.cSig

#if you want the baits to appear in a specific order, create this list in that specific order
#Use the exact string as it appears in the mixing matrix!
toPlotInOrder = ['RRP6', 'SNF5', 'TMA46', 'STM1', 'MRP51', 'RBG1'] #, 'ANAPC5', 'IgG', 'PFDN1', 'PPP2CA;PPP2CB']
listOfBaits = [] 

for i in range(len(toPlotInOrder)):
    for j in range(len(processedData.abProteins)):
        if(processedData.abProteins[j].gene_symbol == toPlotInOrder[i]):
            listOfBaits.append(processedData.abProteins[j])

#to plot all baits, use this line 
# listOfBaits = processedData.abProteins.copy()

processedData.plotMixingAndSignalForList(colors=colors, baitsToPlot=listOfBaits)

In [None]:
#######################
# figure 3g: 
# only RRP6 is chosen
#######################
print("Solving all proteins:")
trainingData=processedData
nnls_bss = bestSubsetSelectionPoolSolver.NNLSBestSubsetSelectionProteinSolver(trainingData, stopping_criteria=10, model='F_global')
#Builds the full solver
pps = parallelProteinSolver.parallelPoolSolver(trainingData,nnls_bss)
#Actually builds the solver
ppiMatrix = pps.solveExperiment()

for iBait in range(trainingData.nBaits):
    print("> Top PPIs for %s"%trainingData.abProteins[iBait].gene_symbol)
    baitMixingIndices=[iBait]
    plotIndices=[ v[0] for v in sorted([ (i, ppiMatrix.matrix[i,iBait+trainingData.nIntercepts ]) for i in range(len(ppiMatrix.matrix)) if  ppiMatrix.matrix[i,iBait+trainingData.nIntercepts]>0], key=lambda x: -x[1])]
    if len(plotIndices)>20:
        plotIndices = plotIndices[:20]
    ppiMatrix.plotCombinedMixSignalBetaPlot(trainingData=(processedData), proteinIndices=plotIndices, mixingIndices=baitMixingIndices, imageScale=0.5, xPad=0, yPad=0, ppiAnnotations=knownPPIs, sigThreshold=5)

