In [41]:
# coding: utf-8
import ast
import os
import sys
import re
from pprint import pprint
import numpy as np
from tqdm import tqdm

from func import normalization as norm

##############################################
# JUPYTER-NOTEBOOK FOR COMPUTING PERFORMANCE #
# OF BFG-PCA SCREENINGS                      #
# Last modified by Daniel Evans-Yamamoto     #
##############################################

PATH       = os.path.abspath(".") # Path to Where you want to have the data. Default is in the BFG-PCA folder. 
run_name   = "test"
script_dir = '%s/Rscripts'%(PATH)
plot_dir   = '%s/Data/%s/plots'%(PATH,run_name)

if not os.path.isdir(plot_dir):
    os.makedirs(plot_dir)

In [25]:
# List of plots which will be generated
# For more details, go to the BFG-PCA wiki.
# 1. Distribution of marginal barcode abundance (histogram; representing abundance of haploid strains)
# 2. Distribution of barcode replicates per ORF (histogram)
# 3. Distribution of fused barcode abundance in each condition (histogram and heatmap)  
# 4. Distribution of raw inteaction signal in each delection condition (heatmap)
# 5. Rank plot of autoactivity of each strain (scatter plot)
# 6. Correlation plot of raw inteaction signals between BC1-BC1 and BC2-BC2 barcode fusion replicates for each fused barcodes (scatter plot) 
# 7. Correlation plot of raw inteaction signals between diploid replicates for each PPI paie, excluding combinations which share the same haploid strains before mating (scatter plot) 
# 8. Correlation plot of raw inteaction signals between screening replicates (scatter plot)
# 9. Distribution of number of replicates observed for each protein pair (histogram)
#10. Rank plot of top hits in the screening (Scatter plot + heatmap)

# List files to plot the graphs as above
haploid_marginal     = '%s/Data/%s/normalization/Haploid_F.csv'%(PATH,run_name) 
haploid_barcode_reps = '%s/Data/%s/normalization/Haploid_n.csv'%(PATH,run_name) 
diploid_F            = '%s/Data/%s/normalization/heatmap_F'%(PATH,run_name) #This is a directory 
diploid_s            = '%s/Data/%s/normalization/heatmap_s'%(PATH,run_name) #This is a directory 
autoactivity         = '%s/Data/%s/normalization/autoactivity_level.csv'%(PATH,run_name) 
corr_BFGtype         = '%s/Data/%s/normalization/BFG_correlation.csv'%(PATH,run_name) 
corr_diploids        = '%s/Data/%s/normalization/diploid_correlation.csv'%(PATH,run_name) 
corr_screening       = '%s/Data/%s/normalization/screening_rep_correlation.csv'%(PATH,run_name) 
scores               = '%s/Data/%s/MCC/SCORES_BFG-PCA_MTX200_Median_Percentile_45_for_plot.csv'%(PATH,run_name) 

In [13]:
# Distribution of barcode replicates per ORF (histogram)
"""
Data structure: Haploid_n.csv
Screeening	    Ori	    Strain	            F
PCA Control_1	DHFR3	DHFR3_ScPRS_03-BC1	0.099510531
PCA Control_1	DHFR3	DHFR3_ScPRS_03-BC2	0.034037403
"""

out_file = haploid_marginal.split("/")[-1].split(".")[0]
panel_width  = 2.5 #cm  
panel_height = 2.5 #cm
pdf_width    = 8
pdf_height   = 8

command = "Rscript %s/strain_histogram_haploid.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,haploid_marginal,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.popen(command).read())


Executing ;


Rscript /Users/danyamamotoevans/GitHub/BFG-PCA/Rscripts/strain_histogram_haploid.r /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/normalization/Haploid_F.csv /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/Haploid_F.pdf 2.50 2.50 8.00 8.00


[1] "ggplot2 is loaded correctly"
[1] "scales is loaded correctly"
[1] "cowplot is loaded correctly"
[1] "grid is loaded correctly"
[1] "gridExtra is loaded correctly"
[1] "stringr is loaded correctly"
[1] "egg is loaded correctly"
[1] "/Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/Haploid_F.pdf"



In [19]:
# Plot distribution of marginal barcode abundance (histogram; representing abundance of haploid strains)
"""
Data structure: Haploid_n.csv
Cond	Rep	ori	    n
Control	1	DHFR3	2
Control	1	DHFR3	1
"""

out_file = haploid_barcode_reps.split("/")[-1].split(".")[0]
panel_width  = 2.5 #cm  
panel_height = 2.5 #cm
pdf_width    = 8
pdf_height   = 8

command = "Rscript %s/strain_histogram_barcode_n.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,haploid_barcode_reps,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.system(command))


Executing ;


Rscript /Users/danyamamotoevans/GitHub/BFG-PCA/Rscripts/strain_histogram_barcode_n.r /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/normalization/haplliod_n.csv /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/haplliod_n.pdf 2.50 2.50 8.00 8.00


[1] "ggplot2 is loaded correctly"
[1] "scales is loaded correctly"
[1] "cowplot is loaded correctly"
[1] "grid is loaded correctly"
[1] "gridExtra is loaded correctly"
[1] "stringr is loaded correctly"
[1] "egg is loaded correctly"
[1] "/Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/haplliod_n.pdf"



In [72]:
# Distribution of fused barcode abundance in each condition (histogram and heatmap)  
"""
Data structure: F_heatmap_*.csv
Method	Condition	Replicate	bait	bait_ORF	bait_ID	prey	prey_ORF	prey_ID	bait_bc	prey_bc	Raw	F
PCA	Control	1	DHFR3_ScPRS_03	NSP1	1BC1	DHFR12_ScPRS_01	NSP1	1BC1	BC1	BC1	82	0.00462411
PCA	Control	1	DHFR3_ScPRS_03	NSP1	1BC2	DHFR12_ScPRS_01	NSP1	1BC1	BC2	BC1	39	0.002241707"""

Fs = [ f for f in os.listdir(diploid_F) if (f[-4:]==".csv")]

LL = [["Method","Condition","Replicate","bait","bait_ORF","bait_ID","prey","prey_ORF","prey_ID","bait_bc","prey_bc","Raw","F"]]
for f in Fs:
    LL += norm.csv2LL("%s/%s"%(diploid_F,f))[1:]
norm.LL2csv(LL,"%s/F_heatmap_merged.csv"%(plot_dir))

f = "%s/F_heatmap_merged.csv"%(plot_dir)
out_file = f.split("/")[-1].split(".")[0]
panel_width  = 4.0 #cm  
panel_height = 4.0 #cm
pdf_width    = 30.0
pdf_height   = 20.0

command = "Rscript %s/heatmap_F.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,f,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.system(command))


Generated : /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/F_heatmap_merged.csv
Executing ;


Rscript /Users/danyamamotoevans/GitHub/BFG-PCA/Rscripts/heatmap_F.r /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/F_heatmap_merged.csv /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/F_heatmap_merged.pdf 4.00 4.00 30.00 20.00


NULL
NULL
NULL
NULL
NULL
NULL



In [81]:
# Distribution of fused barcode abundance in each condition (histogram and heatmap)  
"""
Data structure: s_heatmap_*.csv
Method	Condition	Replicate	bait	prey	s
PCA	MTX1	72	1BC1	1BC1	1.601097664
PCA	MTX1	72	1BC2	1BC1	2.461562041
"""

Fs = [ f for f in os.listdir(diploid_s) if (f[-4:]==".csv")]

LL = [["Method","Condition","Replicate","bait","prey","s"]]
for f in Fs:
    LL += norm.csv2LL("%s/%s"%(diploid_s,f))[1:]
norm.LL2csv(LL,"%s/s_heatmap_merged.csv"%(plot_dir))

f = "%s/s_heatmap_merged.csv"%(plot_dir)
out_file = f.split("/")[-1].split(".")[0]
panel_width  = 4.0 #cm  
panel_height = 4.0 #cm
pdf_width    = 30.0
pdf_height   = 20.0

command = "Rscript %s/heatmap_s.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,f,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.system(command))


Generated : /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/s_heatmap_merged.csv
Executing ;


Rscript /Users/danyamamotoevans/GitHub/BFG-PCA/Rscripts/heatmap_s.r /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/s_heatmap_merged.csv /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/s_heatmap_merged.pdf 4.00 4.00 30.00 20.00


0


In [87]:
# Rank plot of autoactivity of each strain (scatter plot)
"""
Data structure: autoactivity_level.csv
Method	Strain_type	Condition	Replicate	Strain	AA_median	AA_median_rank
PCA	DHFR12	MTX1	72	Prey_DHFR12_ScPRS_13_BC2	2.704718695	1
PCA	DHFR12	MTX1	72	Prey_DHFR12_ScPRS_04_BC2	2.700159548	2
"""

out_file = autoactivity.split("/")[-1].split(".")[0]
panel_width  = 3 #cm  
panel_height = 1.5 #cm
pdf_width    = 20
pdf_height   = 20

command = "Rscript %s/autoactivity_scatter.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,autoactivity,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.system(command))





Executing ;


Rscript /Users/danyamamotoevans/GitHub/BFG-PCA/Rscripts/autoactivity_scatter.r /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/normalization/autoactivity_level.csv /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/autoactivity_level.pdf 3.00 1.50 20.00 20.00


0


In [99]:
#==NOW WORKING==

# Correlation plot of raw inteaction signals between BC1-BC1 and BC2-BC2 barcode fusion replicates for each fused barcodes (scatter plot) 
"""
Data structure: BFG_correlation.csv
Method	Conditon	Replicate	score_type	UpUp	DnDn
PCA	MTX1	72	s	1.634691256	1.567504071
PCA	MTX1	72	s	3.052366118	1.870757965
"""

out_file = corr_BFGtype.split("/")[-1].split(".")[0]
panel_width  = 3 #cm  
panel_height = 1.5 #cm
pdf_width    = 20
pdf_height   = 20

command = "Rscript %s/BFG_correlation_scatter.r %s %s/%s.pdf %.2f %.2f %.2f %.2f"%(script_dir,corr_BFGtype,plot_dir,out_file,panel_width,panel_height,pdf_width,pdf_height)
print("Executing ;\n\n\n%s\n\n"%(command))
print(os.popen(command).read())


Executing ;


Rscript /Users/danyamamotoevans/GitHub/BFG-PCA/Rscripts/BFG_correlation_scatter.r /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/normalization/BFG_correlation.csv /Users/danyamamotoevans/GitHub/BFG-PCA/Data/test/plots/BFG_correlation.pdf 3.00 1.50 20.00 20.00


[1] "ggplot2 is loaded correctly"
[1] "scales is loaded correctly"
[1] "cowplot is loaded correctly"
[1] "grid is loaded correctly"
[1] "gridExtra is loaded correctly"
[1] "GGally is loaded correctly"
[1] "plyr is loaded correctly"
[1] "Computing Pearson correlation for each group...."
      group        COR
1   MTX1 72 0.20033554
2   MTX1 96 0.09292022
3  MTX10 72 0.60631591
4  MTX10 96 0.56805311
5 MTX100 72 0.60544418
6 MTX100 96 0.61343594
7 MTX200 72 0.59026961
8 MTX200 96 0.61370399



In [None]:

# 6. 
# 7. Correlation plot of raw inteaction signals between diploid replicates for each PPI paie, excluding combinations which share the same haploid strains before mating (scatter plot) 
# 8. Correlation plot of raw inteaction signals between screening replicates (scatter plot)
# 9. Distribution of number of replicates observed for each protein pair (histogram)
#10. Rank plot of top hits in the screening (Scatter plot + heatmap)

# List files to plot the graphs as above
corr_diploids        = '%s/Data/%s/normalization/diploid_correlation.csv'%(PATH,run_name) 
corr_screening       = '%s/Data/%s/normalization/screening_rep_correlation.csv'%(PATH,run_name) 
scores               = '%s/Data/%s/MCC/SCORES_BFG-PCA_MTX200_Median_Percentile_45_for_plot.csv'%(PATH,run_name) 