# In this Jupyter Notebook I will analyse the data from the non-coding RNA screen and the bioneer screen.

In [None]:
#importing the packages used
import numpy as np
import matplotlib.pyplot as plt
import statistics as st
import scipy.stats as stats
import pandas as pd
from skimage.morphology import square
from skimage.measure import label, regionprops
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
from matplotlib.axes import Axes
from skimage.filters import threshold_otsu
import skimage.io
import skimage.filters
import skimage.measure
import os
import sys
sys.path.append('/Users/bencekover/Library/CloudStorage/OneDrive-Personal/MSci Bahler lab/S.-Pombe-MLPs/')
import yeastmlptrial.adhesion as adh
root = "/Users/bencekover/Library/CloudStorage/OneDrive-Personal/MSci Bahler lab/S.-Pombe-MLPs/"



In [None]:
image_for_raw_layout = root + "internal data/filled_out_plate.jpg"

results = pd.DataFrame(columns = ['gene', "before_wash",'plate','color', 'position', 'ratio'])
for num in range(1,11):
    for color in ["red","blue","yellow","green"]:
        #create layout
        strain_layout_file = root + "external data/bioneer/96_well_layout_plate_"+str(num)+"_"+color+".csv"
        
        
        image_before = root + "internal data/Bioneer screen images/images/20230227_1_biofilm_bioneer_p"+str(num)+"_"+color+".jpg"
        image_after = root + "internal data/Bioneer screen images/images/20230227_1_biofilm_bioneer_p"+str(num)+"_"+color+"_w.jpg"
        
        df=adh.result_from_of_ims(image_before,image_after,strain_layout_file, image_for_raw_layout,show_layout_fits = True,x1=60,x2=60,y1=60,y2=60, keep_all=True,mean_or_median = "median")
    
        df.rename(columns={"rep":"plate"}, inplace=True)
        #rename strain to gene
        df.rename(columns={"strain":"gene"}, inplace=True)
        
       
        #keep top 5 highest ratio values
        df["color"] = color
        df["plate"] = num
        results = results.append(df[['gene',"before_wash", 'plate','color', 'position', 'ratio']], ignore_index=True)

results_mean = pd.DataFrame(columns = ['gene', "before_wash",'plate','color', 'position', 'ratio'])
for num in range(1,11):
    for color in ["red","blue","yellow","green"]:
        #create layout
        strain_layout_file = root + "external data/bioneer/96_well_layout_plate_"+str(num)+"_"+color+".csv"
        
        image_before = root + "internal data/Bioneer screen images/images/20230227_1_biofilm_bioneer_p"+str(num)+"_"+color+".jpg"
        image_after = root + "internal data/Bioneer screen images/images/20230227_1_biofilm_bioneer_p"+str(num)+"_"+color+"_w.jpg"
        
        df=adh.result_from_of_ims(image_before,image_after,strain_layout_file, image_for_raw_layout,show_layout_fits = False ,x1=60,x2=60,y1=60,y2=60, keep_all=True,mean_or_median = "mean")
    
        df.rename(columns={"rep":"plate"}, inplace=True)
        #rename strain to gene
        df.rename(columns={"strain":"gene"}, inplace=True)
        
        #keep top 5 highest ratio values
        df["color"] = color
        df["plate"] = num
        results_mean = results_mean.append(df[['gene', "before_wash",'plate','color', 'position', 'ratio']], ignore_index=True)

#remove NaNs
results = results.dropna()
results_mean = results_mean.dropna()

raw_results = results.copy()
raw_results_mean = results_mean.copy()
        

# Loading in the ncRNA results

In [None]:
results_ncrna = pd.DataFrame(columns = ['gene', "before_wash",'plate','color', 'position', 'ratio'])
for num in range(1,3):
    for color in ["red","blue","yellow","green"]:
        #create layout
        strain_layout_file= root + "external data/Rodriguez-Lopez et al. ncRNA deletion library/ncRNA_"+color+"_plate_" + str(num) + ".csv"
        
        image_before = root + "internal data/ncRNA screen images/images/20230220_1_biofilm_ncrna_del_emm_plate_"+str(num)+"_"+color+".jpg"
        image_after = root + "internal data/ncRNA screen images/images/20230220_1_biofilm_ncrna_del_emm_plate_"+str(num)+"_"+color+"_w.jpg"
        
        df=adh.result_from_of_ims(image_before,image_after,strain_layout_file, image_for_raw_layout,show_layout_fits = True,x1=60,x2=60,y1=60,y2=60, keep_all=True,mean_or_median = "median")
        
        
        
        df.rename(columns={"rep":"plate"}, inplace=True)
        #rename strain to gene
        df.rename(columns={"strain":"gene"}, inplace=True)
        
       
        #keep top 5 highest ratio values
        
        df["color"] = color
        df["plate"] = num
        results_ncrna = results_ncrna.append(df[['gene',"before_wash", 'plate','color', 'position', 'ratio']], ignore_index=True)

raw_results_ncrna = results_ncrna.copy()

In [None]:
#sort results by ratio
results_ncrna.sort_values(by=['ratio'], ascending=False, inplace=True)
#show where gene contans 781
#turn gene into str
results_ncrna["gene"] = results_ncrna["gene"].astype(str)
#remove entries where gene contains 972	
results_ncrna = results_ncrna[~results_ncrna["gene"].str.contains("972")]
#concat results_ncrna and results
#but make a new column for the source of the data
results_ncrna["source"] = "ncRNA"
results["source"] = "bioneer"
results = pd.concat([results,results_ncrna], ignore_index=True) 
results

In [None]:

plt.hist(results["before_wash"], bins=100, color="grey")
#color entries that are bioneer
plt.hist(results[results["source"]=="bioneer"]["before_wash"], bins=100, color="blue")

#and write the number of values that made the cute and didnt make the cut
plt.text(0.15, 150, str(str(len(results[results["before_wash"]>=0.1])) + " out of " + str(len(results)) + " strains had values above 0.1"))
plt.xlabel("Pixel intensity before washing", fontsize=15)
plt.ylabel("Number of strains", fontsize=15)
#add legend showing that red is bioneer and blue is ncRNA with a thin line
plt.legend(["ncRNA","Bioneer"], loc="upper right", frameon=False)

#draw a red line at 0.1
plt.axvline(x=0.1, color='r', linestyle='-')

plt.show()

In [None]:

plt.hist(results["before_wash"], bins=100, color="grey")
#color entries that are bioneer
plt.hist(results[results["source"]=="bioneer"]["before_wash"], bins=100, color="blue")
#hist of those where position starts with A
plt.hist(results[results["position"].str.startswith("A")]["before_wash"], bins=100, color="red")
plt.hist(results[results["position"].str.startswith("B")]["before_wash"], bins=100, color="green")

#and write the number of values that made the cute and didnt make the cut

plt.text(0.15, 150, str(str(len(results[results["before_wash"]>=0.1])) + " out of " + str(len(results)) + " strains had values above 0.1"))
plt.xlabel("Pixel intensity before washing", fontsize=15)
plt.ylabel("Number of strains", fontsize=15)
#add legend showing that red is bioneer and blue is ncRNA with a thin line

plt.legend(["ncRNA","Bioneer"], loc="upper right", frameon=False)

#draw a red line at 0.1
plt.axvline(x=0.1, color='r', linestyle='-')


plt.show()

In [None]:
#From both results and results_mean remove entries with before_wash < 0.1
results = results.loc[results["before_wash"] > 0.1]
results_mean = results_mean.loc[results_mean["before_wash"] > 0.1]


In [None]:

print("Number of entries kept: ", len(results))
print("Number of entries kept: ", len(results_mean))

In [None]:
#remove genes from ncrna
results = results[results["source"]=="bioneer"]

In [None]:
#add a column called ecdf to results
results["ecdf"] = 0
#sort results by ratio
results.sort_values(by=['ratio'], ascending=False, inplace=True)
#reset
results.reset_index(drop=True, inplace=True)
#ecdf should be the fraction of entries that have a ratio lower than the current entry

results["ecdf"] = 1-np.arange(len(results))/len(results)
results.head(20)


In [None]:
#create a plot where x is ratio and y is ecdf
#choose blue colorpalette
#dont use seaborn grid

plt.scatter(results["ratio"], results["ecdf"],  s=25,alpha=0.2, color="blue")
#add a line at 0.95
plt.axhline(y=0.95, color='r', linestyle='-',lw=4)
#make pretty
plt.xlabel("Ratio of pixel intensity after washing to before washing", fontsize=15)
plt.ylabel("ECDF", fontsize=15)
plt.title("ECDF of adhesion values", fontsize=12)
#also where the 0.95 line intersects the data, draw a vertical line
#find 95th percentile
percentile = np.percentile(results["ratio"], 95)
#draw a vertical line at that point
plt.axvline(x=percentile, color='r', linestyle='-',lw=2, alpha=0.5)
#add text to show where the line is
plt.text(0.12, 0.9, "95th percentile: " + str(round(percentile,3)), fontsize=9, color="black")
#add text to show how many entries are above the line
plt.text(0.12, 0.85, "Number of entries above 95th percentile: " + str(len(results[results["ratio"]>percentile])), fontsize=9, color="black")
#more detailed axes, meaning ticks every 0.1
plt.xticks(np.arange(0, 1.1, 0.1))
plt.yticks(np.arange(0, 1.1, 0.1))
#larger ticks
plt.tick_params(axis='both', which='major', labelsize=12)
plt.show()

In [None]:
#SHOW VALUES with gene empty in results_mean
results_mean.loc[results_mean["gene"] == "empty"]
#both of these can be traced back to direct (from other plate) and indirect (by neighbor sample) contamination!


In [None]:
#sort according to ratio 
results.sort_values(by=['ratio'], inplace=True, ascending=False)
#keep top 40 values and reset index
final=results.head(60).reset_index(drop=True)
#add column saying median
final["median_or_mean"]="median"

In [None]:
#add the last three genes from results as controls to the finals df as well
final=final.append(results.tail(3).reset_index(drop=True))
#tail 3 entreis should have median_or_mean = control
#reset index
final=final.reset_index(drop=True)
final.loc[final.index[-3:], "median_or_mean"] = "control"
final


In [None]:
#take top 20 mean values
results_mean.sort_values(by=['ratio'], inplace=True, ascending=False)
mean=results_mean.head(70).reset_index(drop=True)
#add column median_or_mean
mean["median_or_mean"]="mean"
#append entries to final df that are not in final gene
final=final.append(mean[~mean["gene"].isin(final["gene"])])
final=final.reset_index(drop=True)


In [None]:
#create a column for 384 arrangement
#iterate through genes, load the respective 384 layout based on the plate column and find position of the gene in 384 layout
final["384_row"]=""
final["384_col"]=""
for i in range(0,len(final)):
      ## /Users/bencekover/Library/CloudStorage/OneDrive-UniversityCollegeLondon/MSci Bahler lab/S.-Pombe-biofilm/external data/bioneer/384_well_layout_plate_1.csv
      plate_384 = pd.read_csv(root + "external data/bioneer/384_well_layout_plate_"+str(final["plate"][i])+".csv", header=None)
    
      final["384_row"][i]=np.where(plate_384.to_numpy()==final["gene"][i])[0]+1
      final["384_col"][i]=np.where(plate_384.to_numpy()==final["gene"][i])[1]+1

In [None]:
#create a coord column with plate, row, col
final["coord"]=final["plate"].astype(str)+"_"+final["384_row"].astype(str)+"_"+final["384_col"].astype(str)
#import gene annotation for gene names

In [None]:
#remove gene "empty"
final=final[final["gene"]!="empty"]
#reset index
final=final.reset_index(drop=True)
final


In [None]:
#import /Users/bencekover/Library/CloudStorage/OneDrive-UniversityCollegeLondon/MSci Bahler lab/S.-Pombe-biofilm/external data/gene_IDs_names_products.tsv
gene_names = pd.read_csv(root + "external data/Pombase files/gene_IDs_names_products.tsv", sep="\t", header=None)
#keep column 0 and 2 and rename to gene and name
gene_names=gene_names[[0,2,4]]
gene_names.columns=["gene","name","function"]
#merge final and gene_names through gene
final=pd.merge(final,gene_names, on="gene")
final

In [None]:
#create a df called angeli which has all strains from final, that are not control
angeli = final[final["median_or_mean"]!="control"]
angeli_background=results[["gene","plate","color","position","ratio","before_wash"]].copy()
#save both as .csv
#leave for both only the gene column
angeli=angeli["gene"]
#remove index
angeli_background=angeli_background["gene"] 
angeli.to_csv(root + "Bence folder/Angeli/bioneer_angeli_recent.txt", index=False)
angeli_background.to_csv(root + "Bence folder/Angeli/bioneer_angeli_background_recent.txt", index=False)


In [None]:
#take all results where ratio is above the 95th percentile

results_above_0_05 = results[results["ratio"]>results["ratio"].quantile(0.95)]["gene"]
#save as csv
results_above_0_05.to_csv(root + "Bence folder/Angeli/bioneer_angeli_95thpercentile_recent.txt", index=False)
#save all results genes
results["gene"].to_csv(root + "Bence folder/Angeli/bioneer_angeli_all_100percent_recent.txt", index=False)

In [None]:
#create df plot_bef_aft with results but before_wash>0.1
plot_bef_aft=results[results["before_wash"]>0.1]
#plot before_wash versus before_wash*ratio and call that after_wash
plot_bef_aft["after_wash"]=plot_bef_aft["before_wash"]*plot_bef_aft["ratio"]
#also draw a line with slope of the ratio of the 95th percentile of ratio valiues
fig, ax = plt.subplots()
ax.scatter(plot_bef_aft["before_wash"],plot_bef_aft["after_wash"], s=25, alpha=0.2, color="blue")

ax.plot([0,1],[0,plot_bef_aft["ratio"].quantile(0.95)], color="red")
ax.set_xlabel("Normalised pixel intensity before wash", fontsize=15)
ax.set_ylabel("Normalised pixel intensity after wash", fontsize=15)
ax.set_title("Pixel intensity before and after wash")
#print p value of t test of values above and below. that means values above 95th percentile of ratio
above_vals=plot_bef_aft[plot_bef_aft["ratio"]>plot_bef_aft["ratio"].quantile(0.95)]["before_wash"]
below_vals=plot_bef_aft[plot_bef_aft["ratio"]<plot_bef_aft["ratio"].quantile(0.95)]["before_wash"]
#print mean before vals and p value

plt.text(0.05,0.85,"Mean before wash intensity above cutoff: "+str(round(above_vals.mean(),4)), transform=ax.transAxes, fontsize=10)
plt.text(0.05,0.8,"Mean before wash intensity below cutoff: "+str(round(below_vals.mean(),4)), transform=ax.transAxes, fontsize=10)
plt.text(0.35,0.25,"95th percentile of after/before ratio: "+str(round(plot_bef_aft["ratio"].quantile(0.95),4)), transform=ax.transAxes, color="red",fontsize=10)
#pval in scientific notation until 4 significant digits
p_val = stats.ttest_ind(above_vals,below_vals)[1]
#find how many digits after the decimal point
digits = -int(np.floor(np.log10(p_val)))+5
#round to that many digits
p_val = round(p_val, digits)
plt.text(0.05,0.75,"P-value: "+str(p_val), transform=ax.transAxes, fontsize=10)

In [None]:
#read in /Users/bencekover/Documents/20230306_1_biofilm_bioneer_final_printed_second.csv
printed1 = pd.read_csv("/Users/bencekover/Documents/20230306_1_biofilm_bioneer_final_printed_second.csv")
#/Users/bencekover/Documents/20230227_1_biofilm_bioneer_final.csv
printed2 = pd.read_csv("/Users/bencekover/Documents/20230227_1_biofilm_bioneer_final.csv")

In [None]:
results


In [None]:
#create a dataframe called more_print, which is a subset of results. Essentially find all rows where genes
#are not in printed1 or printed2 and have ratio above 0.1
more_print=results[~results["gene"].isin(printed1["gene"]) & ~results["gene"].isin(printed2["gene"]) & (results["ratio"]>0.1)  & (results["before_wash"]>0.2)]
more_print

# Following up on the hits 

In [None]:
#load in layouts
sticky1_layout = root + "internal data/Screen_BK_plate_1.xlsx"

sticky2_layout = root + "internal data/Screen_BK_plate_2.xlsx"


#sticky plate 1 EMM

folder1 = root + "internal data/confirming deletion hits/Sticky plate one_EMM_before"
folder2 = root + "internal data/confirming deletion hits/Sticky plate one_EMM_after"
sticky_1_emm =  adh.result_from_folders_of_ims(folder1,folder2, sticky1_layout, image_for_raw_layout)
sticky_1_emm = sticky_1_emm[sticky_1_emm['n']>4]
adh.barchart_from_res(sticky_1_emm)


#sticky plate 1 YES

folder1 = root + "internal data/confirming deletion hits/Sticky plate one_YES_before"
folder2 = root + "internal data/confirming deletion hits/Sticky plate one_YES_after"
sticky_1_yes = adh.result_from_folders_of_ims(folder1,folder2, sticky1_layout, image_for_raw_layout)
adh.barchart_from_res(sticky_1_yes)

#sticky plate 2 EMM

folder1 = root + "internal data/confirming deletion hits/Sticky plate two_EMM_before"
folder2 = root + "internal data/confirming deletion hits/Sticky plate two_EMM_after"
sticky_2_emm = adh.result_from_folders_of_ims(folder1,folder2, sticky2_layout, image_for_raw_layout)
sticky_2_emm = sticky_2_emm[sticky_2_emm['n']>4]
adh.barchart_from_res(sticky_2_emm)



#sticky plate 2 YES
folder1 = root + "internal data/confirming deletion hits/Sticky plate two_YES_before"   
folder2 = root + "internal data/confirming deletion hits/Sticky plate two_YES_after"
sticky_2_yes = adh.result_from_folders_of_ims(folder1,folder2, sticky2_layout, image_for_raw_layout)
adh.barchart_from_res(sticky_2_yes)








# Merging YES data

In [None]:

#concat the yes dfs
final_results_yes_screen = pd.concat([sticky_1_yes,sticky_2_yes])

#reset
final_results_yes_screen = final_results_yes_screen.reset_index(drop=True)
for i in ["SPNCRNA.781","SPNCRNA.900","SPNCRNA.1234"]:
      subset = final_results_yes_screen[final_results_yes_screen['strain'].str.contains(i)]
      indices = subset.index
    #remove from df
      final_results_yes_screen = final_results_yes_screen.drop(indices)
      ratios=[k for i in subset['filtered_all_ratio_vals'] for k in i]
      ratio_mean = np.mean(ratios)
      ratio_sem = np.std(ratios)/np.sqrt(len(ratios))
      before_wash_mean = np.mean(subset['before_wash'])
      after_wash_mean = np.mean(subset['after_wash'])
      n = np.sum(subset['n'])
      new_row = {'strain':i,'ratio':ratio_mean,'sem':ratio_sem,'before_wash':before_wash_mean,'after_wash':after_wash_mean,'n':n,'filtered_all_ratio_vals':ratios}
      final_results_yes_screen = final_results_yes_screen.append(new_row,ignore_index=True)
#remove strains starting with JB
final_results_yes_screen = final_results_yes_screen[~final_results_yes_screen['strain'].str.contains("JB")]
#reset
final_results_yes_screen = final_results_yes_screen.reset_index(drop=True)
final_results_yes_screen

# Merging EMM data


In [None]:

#concat the yes dfs
final_results_emm_screen = pd.concat([sticky_1_emm,sticky_2_emm])

#reset
final_results_emm_screen = final_results_emm_screen.reset_index(drop=True)
for i in ["SPNCRNA.781","SPNCRNA.900","SPNCRNA.1234"]:
      subset = final_results_emm_screen[final_results_emm_screen['strain'].str.contains(i)]
      indices = subset.index
    #remove from df
      final_results_emm_screen = final_results_emm_screen.drop(indices)
      ratios=[k for i in subset['filtered_all_ratio_vals'] for k in i]
      ratio_mean = np.mean(ratios)
      ratio_sem = np.std(ratios)/np.sqrt(len(ratios))
      before_wash_mean = np.mean(subset['before_wash'])
      after_wash_mean = np.mean(subset['after_wash'])
      n = np.sum(subset['n'])
      new_row = {'strain':i,'ratio':ratio_mean,'sem':ratio_sem,'before_wash':before_wash_mean,'after_wash':after_wash_mean,'n':n,'filtered_all_ratio_vals':ratios}
      final_results_emm_screen = final_results_emm_screen.append(new_row,ignore_index=True)
#remove strains starting with JB
final_results_emm_screen = final_results_emm_screen[~final_results_emm_screen['strain'].str.contains("JB")]
#reset
final_results_emm_screen = final_results_emm_screen.reset_index(drop=True)

In [None]:
#final_results_emm_screen find rpl3702 
final_results_emm_screen[final_results_emm_screen['strain'].str.contains("rpl3702")]

In [None]:
#final_results_screen should not have any gene with srb, med, nut, pmc in its name, except med18
final_results_screen = final_results_emm_screen[~final_results_emm_screen ['strain'].str.contains("srb|med13|nut|pmc|rox")]
#after value larger than 0.05
final_results_screen = final_results_screen[final_results_screen['after_wash']>0.03]
#sort by ratio_mean
final_results_screen = final_results_screen.sort_values(by=['ratio'],ascending=False)
#reset
final_results_screen = final_results_screen.reset_index(drop=True)
#final_results_mediator contains only  all the above
final_results_mediator = final_results_emm_screen [final_results_emm_screen ['strain'].str.contains("med|pmc|nut|srb|rox")]
#sort by ratio_mean
final_results_mediator = final_results_mediator.sort_values(by=['ratio'],ascending=False)
#reset
final_results_mediator = final_results_mediator.reset_index(drop=True)


In [None]:
final_results_screen

In [None]:
final_results_mediator_yes = final_results_yes_screen[final_results_yes_screen['strain'].str.contains("med|pmc|nut|srb|rox")]
#sort by ratio_mean
final_results_mediator_yes = final_results_mediator_yes.sort_values(by=['ratio'],ascending=False)
#reset
final_results_mediator_yes = final_results_mediator_yes.reset_index(drop=True)


In [None]:
import bokeh.io
import iqplot
bokeh.io.output_notebook()

plots=[]
for i in range(len(final_results_screen)):
    #tempdf with the ratios and strain name
    
    ratios1 = np.array(final_results_screen['filtered_all_ratio_vals'][i])
    strain=final_results_screen['strain'][i]
    media = np.repeat(["EMM"],len(ratios1))

    #find the same strain in final_results_yes_screen
    index = final_results_yes_screen[final_results_yes_screen['strain']==strain].index
    ratios2 = np.array(final_results_yes_screen['filtered_all_ratio_vals'][index].values[0])
    ratios = np.append(ratios1,ratios2)
    strains = np.repeat(strain,len(ratios))

    media = np.append(media,np.repeat(["YES"],len(ratios2)))

    plot_df = pd.DataFrame({'ratio':ratios,'strain':strains,'media':media})
    p=iqplot.ecdf(plot_df,"ratio","media", conf_int=True, title=final_results_screen['strain'][i],
        ptiles= [2.5, 97.5],
        n_bs_reps= 10000, height=150, width=250)
    #add mean and CI
    n_draws=10000
    samples = np.random.choice(ratios1,size=(n_draws,len(ratios1)))
    samples = np.sort(samples)
    #at each element calculate the 2.5 and 97.5 percentile
    means= [np.mean(i) for i in samples]
    mean = np.mean(ratios1)
    #calculate the 95% confidence interval
    conf_int = np.percentile(means,[2.5,97.5])
    #add text to bokeh plot: mean and CI
    p.title.text = p.title.text + ',  mean = {:.2f} [{:.2f},{:.2f}]'.format(mean,conf_int[0],conf_int[1])
    #small font
    p.title.text_font_size = '8pt'
    #labels and ticks smaller font
    p.xaxis.axis_label_text_font_size = '7pt'
    p.yaxis.axis_label_text_font_size = '7pt'
    p.xaxis.major_label_text_font_size = '7pt'
    p.yaxis.major_label_text_font_size = '7pt'
    #x axis label should be Adhesion (before/after) ratio
    p.xaxis.axis_label = 'Adhesion ratio (before/after)'
    #set fix x y range
    p.x_range=bokeh.models.Range1d(0,1)
    p.y_range=bokeh.models.Range1d(0,1)
    #remove legend
    #p.legend.visible=False
    #smaller legend
    p.legend.label_text_font_size = '5pt'
    #samller label icon
    p.legend.glyph_height = 5
    #smaller box
    p.legend.glyph_width = 5
    p.legend.spacing = 0
    plots.append(p)
    

#make gridplot
import bokeh.layouts
grid = bokeh.layouts.gridplot(plots,ncols=4)
#show gridplot tight layout
from bokeh.io import export_png
bokeh.io.show(grid)
#save output of this block to image


#export_png(grid,filename="adhesion_ratio.png")
        

In [None]:
final_results_screen_to_export = final_results_screen[["strain","ratio_mean"]]
#make column systematic name which is a copy of strain
final_results_screen_to_export["systematic_name"] = final_results_screen_to_export["strain"]
#for each systematic ame try and find it in gene_names name and if found replace it with gene
for i in range(len(final_results_screen_to_export)):
      #find the systematic name
      systematic_name = final_results_screen_to_export["systematic_name"][i]
      #find the gene name
      gene = gene_names[gene_names['name']==systematic_name]['gene'].values
      #if gene is not empty
      if len(gene)>0:
            #replace systematic name with gene
            final_results_screen_to_export["systematic_name"][i] = gene[0]
#export to csv
final_results_screen_to_export.to_csv("adhesion_ratio.csv",index=False)
#add random 50 genes to final_results_screen_to_export from results mean
results_mean_before_concat = results_mean[["gene","ratio"]]
#gene is systematic name
results_mean_before_concat["systematic_name"] = results_mean_before_concat["gene"]




In [None]:
#Are the differences significant?
print("srb11 vs srb10, P={}".format(stats.ttest_ind(final_results_mediator['ratio_list'][0],final_results_mediator['ratio_list'][1], permutations=1000000)))
print("med13 vs med18, P={}".format(stats.ttest_ind(final_results_mediator['ratio_list'][2],final_results_mediator['ratio_list'][4], permutations=1000000)))

In [None]:
#final results mediator srb10 srb11 do significance test
ratios = np.array([])
strains = np.array([])
for i in range(7):
   ratios = np.append(ratios,final_results_mediator['filtered_all_ratio_vals'][i])
   strains = np.append(strains,[final_results_mediator['strain'][i]]*len(final_results_mediator['filtered_all_ratio_vals'][i]))

plot_df = pd.DataFrame({'ratio':ratios,'strain':strains})
p=iqplot.ecdf(plot_df,"ratio","strain", conf_int=True, title="Adhesion of Mediator gene deletions on EMM",
    ptiles= [2.5, 97.5],
    n_bs_reps= 10000, x_range=[0,1])
p.xaxis.axis_label = 'Adhesion ratio (before/after)'

#add p value to the plot
bokeh.io.show(p)



In [None]:
#final results mediator srb10 srb11 do significance test
ratios = np.array([])
strains = np.array([])
for i in range(7):
   ratios = np.append(ratios,final_results_mediator_yes['filtered_all_ratio_vals'][i])
   strains = np.append(strains,[final_results_mediator_yes['strain'][i]]*len(final_results_mediator_yes['filtered_all_ratio_vals'][i]))

plot_df = pd.DataFrame({'ratio':ratios,'strain':strains})
p=iqplot.ecdf(plot_df,"ratio","strain", conf_int=True, title="Adhesion of Mediator gene deletions on YES",
    ptiles= [2.5, 97.5],
    n_bs_reps= 10000, x_range=[0,1])

p.xaxis.axis_label = 'Adhesion ratio (before/after)'

#add p value to the plot
bokeh.io.show(p)

# Quantifying the mbx2OE strain 

In [None]:
#sticky plate 2 EMM with mbx2oe


sticky2_mbx2oe_layout = root + "internal data/Screen_BK_plate_2mbx2OE.xlsx"

folder1 = root + "internal data/confirming deletion hits/Sticky plate two and mbx2OE_EMM_before"
folder2 = root + "internal data/confirming deletion hits/Sticky plate two and mbx2OE_EMM_after"
sticky_2_mbx2oe_emm = adh.result_from_folders_of_ims(folder1,folder2, sticky2_mbx2oe_layout, image_for_raw_layout)
adh.barchart_from_res(sticky_2_mbx2oe_emm)


In [None]:
#create ecdf of ratio list where strain is MBX2
ratios = results_df_mbx2[results_df_mbx2['strain']=='MBX2']['ratio_list'].tolist()[0]
plot_df = pd.DataFrame({'ratio':ratios, 'strain':['mbx2 OE']*len(ratios)})
n_draws=10000
samples = np.random.choice(ratios,size=(n_draws,len(ratios)))
samples = np.sort(samples)
    #at each element calculate the 2.5 and 97.5 percentile
means= [np.mean(i) for i in samples]
mean = np.mean(ratios)
    #calculate the 95% confidence interval
conf_int = np.percentile(means,[2.5,97.5])
#show CI to 3 sig digits
p=iqplot.ecdf(data=plot_df, q='ratio', cats='strain',title='ECDF of adhesion ratios of mbx2 overexpression strain. 95% CI of the mean: '+str(round(conf_int[0],3))+'-'+str(round(conf_int[1],3)),conf_int=True, height=400, width=700, show_legend = False)
#bokeh xlim 0,1
#import range1d
from bokeh.models import Range1d
p.x_range=Range1d(0,1.05)
bokeh.io.show(p)

In [None]:
plotfour(mbx2oe_emm_1,mbx2oe_emm_2,mbx2oe_emm_1_w,mbx2oe_emm_2_w)