In [None]:
# coding: utf-8
import ast
import os
import sys
import re
from pprint import pprint
import numpy as np
from tqdm import tqdm

from func import normalization as norm

##############################################
# JUPYTER-NOTEBOOK FOR COMPUTING PERFORMANCE #
# OF BFG-PCA SCREENINGS                      #
# Last modified by Daniel Evans-Yamamoto     #
##############################################

PATH       = os.path.abspath(".") # Path to Where you want to have the data. Default is in the BFG-PCA folder. 
run_name   = "test"
script_dir = '%s/Rscripts'%(PATH)
plot_dir   = '%s/Data/%s/plots'%(PATH,run_name)

if not os.path.isdir(plot_dir):
    os.makedirs(plot_dir)
    os.makedirs("%s/csv"%(plot_dir))

In [None]:
# List of plots which will be generated
# For more details, go to the BFG-PCA wiki.
# 1. Distribution of marginal barcode abundance (histogram; representing abundance of haploid strains)
# 2. Distribution of barcode replicates per ORF (histogram)
# 3. Distribution of fused barcode abundance in each condition (histogram and heatmap)  
# 4. Distribution of raw inteaction signal in each delection condition (heatmap)
# 5. Rank plot of autoactivity of each strain (scatter plot)
# 6. Correlation plot of raw inteaction signals between BC1-BC1 and BC2-BC2 barcode fusion replicates for each fused barcodes (scatter plot) 
# 7. Correlation plot of raw inteaction signals between diploid replicates for each PPI paie, excluding combinations which share the same haploid strains before mating (scatter plot) 
# 8. Correlation plot of raw inteaction signals between screening replicates (scatter plot)
# 9. Distribution of number of replicates observed for each protein pair (histogram)
#10. Rank plot of top hits in the screening (Scatter plot + heatmap)

# List files to plot the graphs as above
haploid_marginal     = '%s/Data/%s/normalization/Haploid_F.csv'%(PATH,run_name) 
haploid_barcode_reps = '%s/Data/%s/normalization/Haploid_n.csv'%(PATH,run_name) 
diploid_F            = '%s/Data/%s/normalization/heatmap_F'%(PATH,run_name) #This is a directory 
diploid_s            = '%s/Data/%s/normalization/heatmap_s'%(PATH,run_name) #This is a directory 
autoactivity         = '%s/Data/%s/normalization/autoactivity_level.csv'%(PATH,run_name) 
corr_BFGtype         = '%s/Data/%s/normalization/BFG_correlation.csv'%(PATH,run_name) 
corr_diploids        = '%s/Data/%s/normalization/diploid_correlation.csv'%(PATH,run_name) 
corr_screening       = '%s/Data/%s/normalization/screening_rep_correlation.csv'%(PATH,run_name) 
scores               = '%s/Data/%s/MCC/SCORES_BFG-PCA_MTX200_Median_Percentile_45_for_plot.csv'%(PATH,run_name) #Change this to your best performing condition.

In [None]:
# Distribution of barcode replicates per ORF (histogram)
"""
Data structure: Haploid_n.csv
Screeening	    Ori	    Strain	            F
PCA Control_1	DHFR3	DHFR3_ScPRS_03-BC1	0.099510531
PCA Control_1	DHFR3	DHFR3_ScPRS_03-BC2	0.034037403
"""

out_file = haploid_marginal.split("/")[-1].split(".")[0]
panel_width  = 2.5 #cm  
panel_height = 2.5 #cm
pdf_width    = 8   #cm  
pdf_height   = 8   #cm  

# Text size is set to 9.5pt throughout the plots

command = "Rscript %s/strain_histogram_haploid.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,haploid_marginal,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.popen(command).read())


In [None]:
# Plot distribution of marginal barcode abundance (histogram; representing abundance of haploid strains)
"""
Data structure: Haploid_n.csv
Cond	Rep	ori	    n
Control	1	DHFR3	2
Control	1	DHFR3	1
"""

out_file = haploid_barcode_reps.split("/")[-1].split(".")[0]
panel_width  = 2.5 #cm  
panel_height = 2.5 #cm
pdf_width    = 8   #cm  
pdf_height   = 8   #cm  

command = "Rscript %s/strain_histogram_barcode_n.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,haploid_barcode_reps,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.system(command))


In [None]:
# Distribution of fused barcode abundance in each condition (histogram and heatmap)  
"""
Data structure: F_heatmap_*.csv
Method	Condition	Replicate	bait	bait_ORF	bait_ID	prey	prey_ORF	prey_ID	bait_bc	prey_bc	Raw	F
PCA	Control	1	DHFR3_ScPRS_03	NSP1	1BC1	DHFR12_ScPRS_01	NSP1	1BC1	BC1	BC1	82	0.00462411
PCA	Control	1	DHFR3_ScPRS_03	NSP1	1BC2	DHFR12_ScPRS_01	NSP1	1BC1	BC2	BC1	39	0.002241707"""

Fs = [ f for f in os.listdir(diploid_F) if (f[-4:]==".csv")]

LL = [["Method","Condition","Replicate","bait","bait_ORF","bait_ID","prey","prey_ORF","prey_ID","bait_bc","prey_bc","Raw","F"]]
for f in Fs:
    LL += norm.csv2LL("%s/%s"%(diploid_F,f))[1:]
norm.LL2csv(LL,"%s/csv/F_heatmap_merged.csv"%(plot_dir))

f = "%s/csv/F_heatmap_merged.csv"%(plot_dir)
out_file = f.split("/")[-1].split(".")[0]
panel_width  = 4.0 #cm  
panel_height = 4.0 #cm
pdf_width    = 30.0#cm  
pdf_height   = 20.0#cm  

command = "Rscript %s/heatmap_F.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,f,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.system(command))


In [None]:
# Distribution of fused barcode abundance in each condition (histogram and heatmap)  
"""
Data structure: s_heatmap_*.csv
Method	Condition	Replicate	bait	prey	s
PCA	MTX1	72	1BC1	1BC1	1.601097664
PCA	MTX1	72	1BC2	1BC1	2.461562041
"""

Fs = [ f for f in os.listdir(diploid_s) if (f[-4:]==".csv")]

LL = [["Method","Condition","Replicate","bait","prey","s"]]
for f in Fs:
    LL += norm.csv2LL("%s/%s"%(diploid_s,f))[1:]
norm.LL2csv(LL,"%s/csv/s_heatmap_merged.csv"%(plot_dir))

f = "%s/csv/s_heatmap_merged.csv"%(plot_dir)
out_file = f.split("/")[-1].split(".")[0]
panel_width  = 4.0 #cm  
panel_height = 4.0 #cm
pdf_width    = 30.0#cm  
pdf_height   = 20.0#cm  

command = "Rscript %s/heatmap_s.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,f,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.system(command))


In [None]:
# Rank plot of autoactivity of each strain (scatter plot)
"""
Data structure: autoactivity_level.csv
Method	Strain_type	Condition	Replicate	Strain	AA_median	AA_median_rank
PCA	DHFR12	MTX1	72	Prey_DHFR12_ScPRS_13_BC2	2.704718695	1
PCA	DHFR12	MTX1	72	Prey_DHFR12_ScPRS_04_BC2	2.700159548	2
"""

out_file = autoactivity.split("/")[-1].split(".")[0]
panel_width  = 3   #cm  
panel_height = 1.5 #cm
pdf_width    = 20  #cm  
pdf_height   = 20  #cm  

command = "Rscript %s/autoactivity_scatter.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,autoactivity,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.system(command))





In [None]:
# Correlation plot of raw inteaction signals between BC1-BC1 and BC2-BC2 barcode fusion replicates for each fused barcodes (scatter plot) 
"""
Data structure: BFG_correlation.csv
Method	Conditon	Replicate	score_type	UpUp	DnDn
PCA	MTX1	72	s	1.634691256	1.567504071
PCA	MTX1	72	s	3.052366118	1.870757965
"""

out_file = corr_BFGtype.split("/")[-1].split(".")[0]
panel_width  = 2.5 #cm  
panel_height = 2.5 #cm
pdf_width    = 20  #cm  
pdf_height   = 20  #cm  

command = "Rscript %s/BFG_correlation_scatter.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,corr_BFGtype,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.popen(command).read())


In [None]:
# Correlation plot of raw inteaction signals between diploid replicates for each PPI paie, excluding combinations which share the same haploid strains before mating (scatter plot) 
"""
Data structure: diploid_correlation.csv
Method	Condition	Replicate	Diploid1	Diploid2
PCA	MTX1	72	2.120713283	2.557337175
PCA	MTX1	72	2.239626565	1.813868495
"""

out_file = corr_diploids.split("/")[-1].split(".")[0]
panel_width  = 2.5  #cm  
panel_height = 2.5  #cm
pdf_width    = 12 #cm  
pdf_height   = 7 #cm  

command = "Rscript %s/diploid_correlation_scatter.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,corr_diploids,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.popen(command).read())


In [None]:
# Correlation plot of raw inteaction signals between screening replicates (scatter plot)
"""
Data structure: screening_rep_correlation.csv
Method	Conditon	ORF-pair	Replicate1	Replicate2	Rep1	Rep2
PCA	MTX1	NSP1-NSP1	72	96	2.031329852	1.731279687
PCA	MTX1	NSP1-NUP100	72	96	2.861862336	2.192066728
"""

out_file = corr_screening.split("/")[-1].split(".")[0]
panel_width  = 2.5  #cm  
panel_height = 2.5  #cm
pdf_width    = 15 #cm  
pdf_height   = 5 #cm  

command = "Rscript %s/rep_correlation_scatter.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,corr_screening,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.popen(command).read())


In [None]:
# Distribution of number of replicates observed for each protein pair (histogram)
# Rank plot of top hits in the screening (Scatter plot + heatmap)

# List files to plot the graphs as above
scores               = '%s/Data/%s/MCC/SCORES_BFG-PCA_MTX200_Median_Percentile_45_for_plot.csv'%(PATH,run_name) 

"""
Data structure: SCORES..._for_plot.csv
Method	Selection	ORF_pair	n	Norm_meth	Th_method	s	Rank	Precision	Recall	MCC	1_Two-hybrid	2_PCA	3_Union	4_ALL	score	TH
PCA	MTX200	RPN11 (Sc)_RPN8 (Sc)	8	Median	Percentile_45	104.8826121	1	1	0.025641026	0.140484233	3	1	12	12	45.26098356	0
PCA	MTX200	RPN11 (Sc)_RPN8 (Sc)	8	Median	Percentile_45	104.8826121	1	1	0.025641026	0.140484233	3	1	12	12	48.26555009	0
PCA	MTX200	RPN11 (Sc)_RPN8 (Sc)	8	Median	Percentile_45	104.8826121	1	1	0.025641026	0.140484233	3	1	12	12	99.11961045	0
"""

out_file = scores.split("/")[-1].split(".")[0]
# For this panel, the panel size and plot width is automatically set.
pdf_width    = 30   #cm This will be automatically adjusted to the number of PPIs shown. It's be this when extracting top 100 pairs.   
pdf_height   = 5    #cm  

command = "Rscript %s/rank_plot.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,scores,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.popen(command).read())


