In [None]:
from poolData import protein, interactionList, pooledDataset, proteinProteinMatrix
from poolSolver import parallelProteinSolver, subsetSelectionProteinSolvers, bestSubsetSelectionPoolSolver 
import importlib
#import poolSolver
import matplotlib.pyplot as plt
importlib.reload(proteinProteinMatrix)
importlib.reload(pooledDataset)
importlib.reload(protein)
importlib.reload(subsetSelectionProteinSolvers)

In [25]:
#Choose the set of known interactions to include
#Reads the dataset
knownPPIs = protein.protein.readProteinProteinAnnotationsUsingPipeline("20250813_human_literature_interactions_UniProtIDs", dropHeader=True)

In [None]:
#load desired dataset
rawPooledData = pooledDataset.pooledDataset.readStandardizeDataset('20250404_PooledIP_15x30', interceptOption = 'means_ones', customIntercept = None)


#investigate distribution of values in the data
print("Distribution of maximum value:")
rawPooledData.distributionOfRankedValue(1,type="value")
print("Distribution of mean value:")
rawPooledData.distributionOfRankedValue(15,type="mean")
print("Distribution of third highest value :")
rawPooledData.distributionOfRankedValue(3,type="value")


#filter dataset
processedData = rawPooledData.filterMissingValues(minnonZeros=3, verbose=True)
processedData = processedData.normalizePools_L2(1e-5, verbose=True)
processedData = processedData.filterByRankedValue(2e7, 3, type="value", verbose=True)
processedData = processedData.normalizeByRankedValue(3, type="mean")


In [None]:
#####################
# Combined "special" plot for figure 2a
#the negative controls/missing 
# baits are represented by grey squares 
# in the signal and coefficient matrices 
#####################

#only solve for bait proteins 
trainingData=processedData.filterForBaits()
nnls_bss = bestSubsetSelectionPoolSolver.NNLSBestSubsetSelectionProteinSolver(trainingData, stopping_criteria=10, model='F_global')
#Builds the full solver
pps = parallelProteinSolver.parallelPoolSolver(trainingData,nnls_bss)
#Solves the experiment
ppiMatrix = pps.solveExperiment()
trainingData=processedData.filterForBaits()
trainingDataMissingBaits = trainingData.includeMissingBaits()
ppiMatrixMissingBaits = ppiMatrix.includeMissingBaits()
ppiMatrixMissingBaits.plotCombinedMixSignalBetaPlot(trainingData=trainingDataMissingBaits, proteinIndices=range(len(trainingDataMissingBaits.signalMatrix)), mixingIndices=range(len(trainingDataMissingBaits.mixingMatrix.T)), imageScale=0.5, xPad=0, yPad=0, specialOrientation = True, ppiAnnotations=[], sigThreshold=10)

In [None]:
############################
# Plot for figure 2b: 
# correlation plots
###########################

colors = {}
colors['mixing'] = ppiMatrix.cMix
colors['pool'] = ppiMatrix.cSig

#if you want the baits to appear in a specific order, create this list in that specific order
#Use the exact string as it appears in the mixing matrix!
toPlotInOrder = ['EIF3A', 'DYNC1I2', 'EXOSC10', 'SMG1', 'DYNC1H1', 'EXOSC2'] 
listOfBaits = [] 

for i in range(len(toPlotInOrder)):
    for j in range(len(processedData.abProteins)):
        if(processedData.abProteins[j].gene_symbol == toPlotInOrder[i]):
            listOfBaits.append(processedData.abProteins[j])

#to plot all baits, use this line 
#listOfBaits = processedData.abProteins.copy()

processedData.plotMixingAndSignalForList(colors=colors, baitsToPlot=listOfBaits)

In [None]:
#########################
# figure 2f: EIF3A
#########################

trainingData=processedData
nnls_bss = bestSubsetSelectionPoolSolver.NNLSBestSubsetSelectionProteinSolver(trainingData, stopping_criteria=10, model='F_global')
#Builds the full solver
pps = parallelProteinSolver.parallelPoolSolver(trainingData,nnls_bss)
#Actually builds the solver
ppiMatrix = pps.solveExperiment()
iBait=16
print("> Top PPIs for %s"%trainingData.abProteins[iBait].gene_symbol)
baitMixingIndices=[iBait]
plotIndices=[ v[0] for v in sorted([ (i, ppiMatrix.matrix[i,iBait+trainingData.nIntercepts ]) for i in range(len(ppiMatrix.matrix)) if  ppiMatrix.matrix[i,iBait+trainingData.nIntercepts]>0], key=lambda x: -x[1])]
if len(plotIndices)>20:
    plotIndices = plotIndices[:20]
ppiMatrix.plotCombinedMixSignalBetaPlot(trainingData=(processedData), proteinIndices=plotIndices, mixingIndices=[16], imageScale=0.5, xPad=0, yPad=0, ppiAnnotations=knownPPIs, sigThreshold=5)


In [None]:
#######################
# figure 2g: added mixing profile of both exosome Abs
# EXOSC10 and EXOSC2
#######################


exoProteins=[ i for i in range(len(processedData.signalMatrix)) if processedData.preyProteins[i].gene_symbol[0:5]=="EXOSC"]
trainingData=processedData.filterDataset(exoProteins)
nnls_bss = bestSubsetSelectionPoolSolver.NNLSBestSubsetSelectionProteinSolver(trainingData, stopping_criteria=10, model='F_global')
pps = parallelProteinSolver.parallelPoolSolver(trainingData,nnls_bss)
ppiMatrix = pps.solveExperiment()
ppiMatrix.plotCombinedMixSignalBetaPlot(trainingData=trainingData, proteinIndices=range(len(trainingData.signalMatrix)), mixingIndices=[11,21], imageScale=0.5, xPad=0, yPad=1, ppiAnnotations=knownPPIs, sigThreshold=5)
