# This script calculates cAMP ratios acquired using the PlacP1 plasmid, recorded by flow cytometry and data exported to a csv file

* It does essentially the same as the script cAMP_PaQa.ipynb
* However, it uses a csv file as input containing fluorescent readings of YFP and mKate2 exported from flow cytometry measurements
* The input csv files contain two columns named "yfp" and "mKate2" (this can be different but then the script must be modified accordingly as specified in the relevant cell)
  * each row represents one measured cell
  * one csv file per sample
  * !!! Important !!! Format of the names of the csv files must be:
    * date_strain-number_Data_strain-name_PaQa_condition_replicate
    * no underscores within the strain name or other items
    * conditions must include liq or sol, because this script looks for "liq" or "sol" in the file name
    * same for "rep"
    * example: 20200908_413_Data_WT_lacP1_liq_rep1.csv
* To plot cAMP ratios, first change all directories in this script according to your system
* Then, run this script, enter reference strain and strains to plot in function "plottingData" (last cell)

In [1]:
import os
import glob
import pandas as pd

import numpy as np
import bokeh.io
import bokeh.plotting
import bokeh.palettes
from bokeh.transform import jitter
import seaborn as sns
import matplotlib
from bokeh.models import HoverTool
from bokeh.models import Range1d
from scipy import stats
import pandas as pd
import math

from bokeh.layouts import row

bokeh.io.output_notebook()

In [2]:
# defines how cAMP ratios are calculated and plotted
# normally, no need to modify anything

def plottingAllData(df, means, labels, indexes, param, colors):
    df.head()
    p = bokeh.plotting.figure(
        width=600, 
        height=600, 
        y_range=labels, 
        x_axis_type='linear',
        x_axis_label = 'Fluorescence intensity',
        title="cAMP "+param+" on solid vs liquid environement"
    )
    for i, index in enumerate(indexes):
        p.circle(
            source=df.loc[index, :],
            x=param, 
            y=jitter('Labels', width=0.3, range=p.y_range),
            color = colors[i],
            alpha=0.3,
            #legend = labelsAll[i]
        )
    p.circle(
        source = means,
        x = param,
        y = 'Labels',
        size = 10,
        line_color = 'black',
        fill_color = 'white',
        legend = "medians"
    )
    p.add_tools(HoverTool(
            tooltips=[
                ('Strain', '@{Strain}'),
                ('Growth condition', '@{Growth}'),
                (param, '@{'+param+'}'),
                ('Biological replicate', '@{Bio_Rep}')
            ],
    ))
    return p

def plottingData(df, means, wt, order, strain_to_remove, growth, param, colors, scale, GraphTitle):
    df2 = df.copy()
    means2 = means.copy()
    
    if isinstance(growth, list):
        for strain in strain_to_remove:
            df2 = df2.drop(df2[(df2['Strain'] == strain)].index)
            means2 = means2.drop(means2[(means2['Strain'] == strain)].index)
        for strain in order:
            df3 = df2.copy()
            means3 = means2.copy()
        val = 0.5
        labels = [None]*(2+len(order)*2)
        indexes = [None]*(2+len(order)*2)
        df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), 'Labels'] = val
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), 'Labels'] = val
        labels[0] = wt+' '+'Liquid'
        indexes[0] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid'))
        df3.loc[((df2['Strain'] == (wt)) & df3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        labels[1] = wt+' '+'Solid'
        indexes[1] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Solid'))
        val = val + 2
        n=2
        for o, ordering in enumerate(order):
            for a, g in enumerate(growth):
                df3.loc[((df2['Strain'] == (ordering)) & df3['Growth'].str.match(g)), 'Labels'] = val
                means3.loc[((means3['Strain'] == (ordering)) & means3['Growth'].str.match(g)), 'Labels'] = val
                labels[n] = ordering + ' ' +g
                indexes[n] = ((df3['Strain'] == (ordering)) & df3['Growth'].str.match(g))
                val = val + 1
                n=n+1
        lookupdf2=df3.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].median().to_frame().reset_index()
        p = bokeh.plotting.figure(
            #width=600, 
            width=1000, 
            x_range=labels, 
            y_axis_type=scale,
            y_axis_label = 'Fluorescence intensity',
            title=GraphTitle+" "+param+" on "+growth[0]+" vs. "+growth[1]+" in "+scale+" scale"
        )
        p.xaxis.major_label_orientation = math.pi/4
        p.xaxis.major_label_text_font_size = "9pt"
        for i, index in enumerate(indexes):
            p.circle(
                source=df3.loc[index, :],
                x=jitter('Labels', width=0.3, range=p.y_range),
                y=param, 
                color = colors[i],
                alpha=0.3,
                #legend = labelsAll[i]
            )
        p.circle(
            source = means3,
            x = 'Labels',
            y = param,
            size = 10,
            line_color = 'black',
            fill_color = 'white',
            legend_label = "medians"
        )
        p.add_tools(HoverTool(
                tooltips=[
                    ('Strain', '@{Strain}'),
                    ('Growth condition', '@{Growth}'),
                    (param, '@{'+param+'}'),
                    ('Biological replicate', '@{Bio_Rep}')
                ],
        ))
    else:
        print('Not a list')
        if growth == 'Liquid':
            ungrowth = 'Solid'
        else:
            ungrowth = 'Liquid'
        for strain in strain_to_remove:
            df2 = df2.drop(df2[(df2['Strain'] == strain)].index)
            means2 = means2.drop(means2[(means2['Strain'] == strain)].index)
        for strain in order:
            df3 = df2.drop(df2[((df2['Strain'] == strain) & (df2['Growth'] == ungrowth))].index)
            means2 = means2.drop(means2[((means2['Strain'] == strain) & (means2['Growth'] == ungrowth))].index)
        means3=means2
        val = 0.5
        labels = [None]*(2+len(order))
        indexes = [None]*(2+len(order))
        df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), 'Labels'] = val
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), 'Labels'] = val
        WT_mean=np.array(means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), param])
        Norm_mean=WT_mean/np.mean(WT_mean)
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), param]=Norm_mean
        df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), param] = df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), param]/np.mean(WT_mean)
        labels[0] = wt+' '+'Liquid'
        indexes[0] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid'))
        print(wt+' Liquid mean='+str(WT_mean))
        df3.loc[((df2['Strain'] == (wt)) & df3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        Strain_mean=np.array(means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Solid')), param])
        Norm_mean=Strain_mean/WT_mean
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Solid')), param]=Norm_mean
        df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Solid')), param] = df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Solid')), param]/np.mean(WT_mean)
        labels[1] = wt+' '+'Solid'
        indexes[1] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Solid'))
        print(wt+' Solid mean='+str(Strain_mean))
        print(wt+' Solid fold increase='+str(Strain_mean/np.mean(WT_mean)))
        val = val + 2
        for o, ordering in enumerate(order):
            df3.loc[((df2['Strain'] == (ordering)) & df3['Growth'].str.match(growth)), 'Labels'] = val
            means3.loc[((means3['Strain'] == (ordering)) & means3['Growth'].str.match(growth)), 'Labels'] = val
            Strain_mean=np.array(means3.loc[((means3['Strain'] == (ordering)) & means3['Growth'].str.match(growth)), param])
            Norm_mean=Strain_mean/np.mean(WT_mean)
            means3.loc[((means3['Strain'] == (ordering)) & means3['Growth'].str.match(growth)), param]=Norm_mean
            print(ordering+' mean='+str(Strain_mean))
            print(ordering+' fold increase='+str(Strain_mean/np.mean(WT_mean)))
            df3.loc[((df2['Strain'] == (ordering)) & df3['Growth'].str.match(growth)), param] = df3.loc[((df2['Strain'] == (ordering)) & df3['Growth'].str.match(growth)), param]/np.mean(WT_mean)
            labels[o+2] = ordering+' '+growth
            indexes[o+2] = ((df3['Strain'] == (ordering)) & df3['Growth'].str.match(growth))
            val = val + 1
        lookupdf2=df3.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].median().to_frame().reset_index()
        p = bokeh.plotting.figure(
            width=600, 
            height=600, 
            x_range=labels, 
            y_axis_type=scale,
            y_axis_label = 'Fold increase',
            title=GraphTitle+" "+param+" on "+growth
        )
        p.xaxis.major_label_orientation = math.pi/4
        p.xaxis.major_label_text_font_size = "9pt"
        for i, index in enumerate(indexes):
            p.circle(
                source=df3.loc[index, :],
                x=jitter('Labels', width=0.3, range=p.y_range),
                y=param, 
                color = colors[i],
                alpha=0.3,
                #legend = labelsAll[i]
            )
        p.circle(
            source = means3,
            x = 'Labels',
            y = param,
            size = 10,
            line_color = 'black',
            fill_color = 'white',
            #legend_label = "medians"
        )
        p.add_tools(HoverTool(
                tooltips=[
                    ('Strain', '@{Strain}'),
                    ('Growth condition', '@{Growth}'),
                    (param, '@{'+param+'}'),
                    ('Biological replicate', '@{Bio_Rep}')
                ],
        ))
    return p, means3

def plottingDataUnique(df, means, wt, order, strain_to_remove, growth, param, colors, scale, GraphTitle):
    df2 = df.copy()
    means2 = means.copy()
    
    if isinstance(growth, list):
        for strain in strain_to_remove:
            df2 = df2.drop(df2[(df2['Strain'] == strain)].index)
            means2 = means2.drop(means2[(means2['Strain'] == strain)].index)
        for strain in order:
            df3 = df2.copy()
            means3 = means2.copy()
        val = 0.5
        labels = [None]*(2+len(order)*2)
        indexes = [None]*(2+len(order)*2)
        df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), 'Labels'] = val
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), 'Labels'] = val
        labels[0] = wt+' '+'Liquid'
        indexes[0] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid'))
        df3.loc[((df2['Strain'] == (wt)) & df3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Solid')), 'Labels'] = (val+1)
        labels[1] = wt+' '+'Solid'
        indexes[1] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Solid'))
        val = val + 2
        n=2
        for o, ordering in enumerate(order):
            for a, g in enumerate(growth):
                df3.loc[((df2['Strain'] == (ordering)) & df3['Growth'].str.match(g)), 'Labels'] = val
                means3.loc[((means3['Strain'] == (ordering)) & means3['Growth'].str.match(g)), 'Labels'] = val
                labels[n] = ordering + ' ' +g
                indexes[n] = ((df3['Strain'] == (ordering)) & df3['Growth'].str.match(g))
                val = val + 1
                n=n+1
        lookupdf2=df3.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].median().to_frame().reset_index()
        p = bokeh.plotting.figure(
            width=600, 
            height=600, 
            x_range=labels, 
            y_axis_type=scale,
            y_axis_label = 'Fluorescence intensity',
            title=GraphTitle+" "+param+" on "+growth[0]+" vs. "+growth[1]+" in "+scale+" scale"
        )
        p.xaxis.major_label_orientation = math.pi/4
        p.xaxis.major_label_text_font_size = "9pt"
        for i, index in enumerate(indexes):
            p.circle(
                source=df3.loc[index, :],
                x=jitter('Labels', width=0.3, range=p.y_range),
                y=param, 
                color = colors[i],
                alpha=0.3,
                #legend = labelsAll[i]
            )
        p.circle(
            source = means3,
            x = 'Labels',
            y = param,
            size = 10,
            line_color = 'black',
            fill_color = 'white',
            legend_lable = "medians"
        )
        p.add_tools(HoverTool(
                tooltips=[
                    ('Strain', '@{Strain}'),
                    ('Growth condition', '@{Growth}'),
                    (param, '@{'+param+'}'),
                    ('Biological replicate', '@{Bio_Rep}')
                ],
        ))
    else:
        if growth == 'Liquid':
            ungrowth = 'Solid'
        else:
            ungrowth = 'Liquid'
        for strain in strain_to_remove:
            df2 = df2.drop(df2[(df2['Strain'] == strain)].index)
            means2 = means2.drop(means2[(means2['Strain'] == strain)].index)
        for strain in order:
            df3 = df2.drop(df2[((df2['Strain'] == strain) & (df2['Growth'] == ungrowth))].index)
            means3 = means2.drop(means2[((means2['Strain'] == strain) & (means2['Growth'] == ungrowth))].index)
        val = 0.5
        labels = [None]*(1+len(order))
        indexes = [None]*(1+len(order))
        df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), 'Labels'] = val
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), 'Labels'] = val
        WT_mean=list(means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), param])
        Norm_mean=WT_mean/WT_mean
        means3.loc[((means3['Strain'] == (wt)) & means3['Growth'].str.match('Liquid')), param]=Norm_mean
        df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), param] = df3.loc[((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid')), param]/WT_mean
        labels[0] = wt+' '+'Liquid'
        indexes[0] = ((df3['Strain'] == (wt)) & df3['Growth'].str.match('Liquid'))
        val = val + 1
        for o, ordering in enumerate(order):
            df3.loc[((df2['Strain'] == (ordering)) & df3['Growth'].str.match(growth)), 'Labels'] = val
            means3.loc[((means3['Strain'] == (ordering)) & means3['Growth'].str.match(growth)), 'Labels'] = val
            Strain_mean=float(means3.loc[((means3['Strain'] == (ordering)) & means3['Growth'].str.match(growth)), param])
            Norm_mean=Strain_mean/WT_mean
            means3.loc[((means3['Strain'] == (ordering)) & means3['Growth'].str.match(growth)), param]=Norm_mean
            df3.loc[((df2['Strain'] == (ordering)) & df3['Growth'].str.match(growth)), param] = df3.loc[((df2['Strain'] == (ordering)) & df3['Growth'].str.match(growth)), param]/WT_mean
            labels[o+1] = ordering+' '+growth
            indexes[o+1] = ((df3['Strain'] == (ordering)) & df3['Growth'].str.match(growth))
            val = val + 1
        lookupdf2=df3.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].median().to_frame().reset_index()
        p = bokeh.plotting.figure(
            width=600, 
            height=600, 
            x_range=labels, 
            y_axis_type=scale,
            y_axis_label = 'Fluorescence intensity',
            title=GraphTitle+" "+param+" on "+growth
        )
        p.xaxis.major_label_orientation = math.pi/4
        p.xaxis.major_label_text_font_size = "9pt"       
        for i, index in enumerate(indexes):
            print(df3.loc[index, param].mean())
            p.circle(
                source=df3.loc[index, :],
                x=jitter('Labels', width=0.3, range=p.y_range),
                y=param, 
                color = colors[i],
                alpha=0.3,
                #legend = labelsAll[i]
            )
        print(means3)
        p.circle(
            source = means3,
            x = 'Labels',
            y = param,
            size = 10,
            line_color = 'black',
            fill_color = 'white',
            legend_label = "medians"
        )
        p.add_tools(HoverTool(
                tooltips=[
                    ('Strain', '@{Strain}'),
                    ('Growth condition', '@{Growth}'),
                    (param, '@{'+param+'}'),
                    ('Biological replicate', '@{Bio_Rep}')
                ],
        ))
    return p

def getDFStats(df, param):
    Strains=list(df.Strain.unique())
    Growth=list(df.Growth.unique())
    Reps=list(df.Bio_Rep.unique())
    inds = [None]*(len(Strains)*len(Growth))
    labels = [None]*(len(Strains)*len(Growth))
    labelsAll = [None]*(len(Strains)*len(Growth)*len(Reps))
    indexes = [None]*(len(Strains)*len(Growth)*len(Reps))
    n = 0
    m = 0
    for strain in Strains:
        for growth in Growth:
            inds[n] = (df['Strain'] == strain) & (df['Growth'] == growth)
            labels[n] = strain+' '+growth
            n= n + 1
            for rep in Reps:
                indexes[m] = (df['Strain'] == strain) & (df['Growth'] == growth) & (df['Bio_Rep'] == rep)
                labelsAll[m] = strain+' '+growth
                m = m + 1
    df['Labels'] = 0
    #df.insert(2, "Labels", 0, True)
    vertical_position = [None]*len(labels)
    val=0.5        
    for s, strain in enumerate(labels):
        vertical_position[s] = val
        df.loc[inds[s], 'Labels'] = val
        val = val+1        
    means = df.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].agg(['median', 'count']).rename(columns={'median': param, 'count': 'N'}).reset_index()
    #means = df.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].median().to_frame().reset_index()
    stds = df.groupby(['Strain', 'Growth', 'Labels', 'Bio_Rep'])[param].std().to_frame().reset_index()
    return means, stds, indexes, labels

In [5]:
data_path = 'H:/directory/FlowCytAnalysis/' # this is where you save your BacStalk csv files, make that folder before running!
working_path = "H:/directory/PythonLacP1Analysis/" # this is where this script saves enhanced csv files, make that folder before running!

os.chdir(working_path)
new_dir = "date\\" # enter date or other name for your project, this will be inside the PythonPaQaAnalysis folder
working_data_path = working_path + new_dir
if not os.path.exists(new_dir):
    os.mkdir(new_dir)
    
os.chdir(data_path)
extension = 'csv'
list_csv = []
for root, dirs, files in os.walk(data_path, topdown=False):
    for name in files:
        if extension in name:
            list_csv.append(os.path.join(root, name))
#print(list_csv)

# this script plots individual datapoints, but there are too many in the flow cytometry readings --> reduce data points randomly
do_reduce = 1 # randomizes and reduces the data of the csv files
reduce_to = 1000

for file in list_csv:
    df_temp = pd.read_csv(file, sep=',', na_values='*')
    if do_reduce:
        len_full=len(df_temp)
        df_temp=df_temp.sample(n=reduce_to) # reduces each csv file to 10000 data points
        len_red=len(df_temp)
        print("Randomized reduction from",len_full,"to",len_red,"-",file.rsplit("FlowCytAnalysis/",2)[1])
    
    split_name=file.rsplit('Data_',2)[1].rsplit('_',100) # splits the name after 'Data_', then takes second part and splits after each '_'
    #print(file)
    #print(split_name)
    file_name = file.rsplit("FlowCytAnalysis/",2)[1] # original: file.rsplit("\\",2)[2] -> didn't work for my naming
    #print("Filename: ",file_name)
    df_temp['Strain'] = split_name[0]
    if 'sol' in file:
        df_temp['Growth']='Solid'
    else:
        df_temp['Growth']='Liquid'
    df_temp['Bio_Rep']=int(split_name[len(split_name)-1].rsplit('rep', 2)[1].rsplit('.',2)[0])
    df_temp.to_csv(working_data_path + 'enhanced_' + file_name, index = None, header=True)
    list_enhanced_csv = []

    
for root, dirs, files in os.walk(working_data_path, topdown=False):
    for name in files:
        if extension in name:
            list_enhanced_csv.append(os.path.join(root, name))

combined_csv = pd.concat([pd.read_csv(f) for f in list_enhanced_csv ], sort=False)
os.chdir(working_path)
combined_csv.to_csv( "Summary_lacP1.csv", index=False, encoding='utf-8-sig')

Randomized reduction from 51901 to 1000 - 20200908_413_Data_WT_lacP1_liq_rep1.csv
Randomized reduction from 53293 to 1000 - 20200908_413_Data_WT_lacP1_sol_rep1.csv
Randomized reduction from 53399 to 1000 - 20200908_421_Data_pilG-_lacP1_liq_rep1.csv
Randomized reduction from 50521 to 1000 - 20200908_421_Data_pilG-_lacP1_sol_rep1.csv
Randomized reduction from 57224 to 1000 - 20200908_422_Data_pilH-_lacP1_liq_rep1.csv
Randomized reduction from 53186 to 1000 - 20200908_422_Data_pilH-_lacP1_sol_rep1.csv
Randomized reduction from 54145 to 1000 - 20200908_438_Data_Flag-PilH_lacP1_liq_rep1.csv
Randomized reduction from 55781 to 1000 - 20200908_438_Data_Flag-PilH_lacP1_sol_rep1.csv
Randomized reduction from 55527 to 1000 - 20200908_439_Data_Flag-PilH-DA_lacP1_liq_rep1.csv
Randomized reduction from 50161 to 1000 - 20200908_439_Data_Flag-PilH-DA_lacP1_sol_rep1.csv
Randomized reduction from 53685 to 1000 - 20200908_440_Data_Flag-PilH-DE_lacP1_liq_rep1.csv
Randomized reduction from 57325 to 1000 - 

Randomized reduction from 53029 to 1000 - 20220324_611_Data_Flag-PilG-PilH-DE_lacP1_sol_rep5.csv
Randomized reduction from 56193 to 1000 - 20220324_621_Data_Flag-PilG-pilH-cyaB-_lacP1_liq_rep5.csv
Randomized reduction from 67316 to 1000 - 20220324_621_Data_Flag-PilG-pilH-cyaB-_lacP1_sol_rep5.csv
Randomized reduction from 56410 to 1000 - 20220324_623_Data_Flag-PilG-PilH-DA-cyaB-_lacP1_liq_rep5.csv
Randomized reduction from 66668 to 1000 - 20220324_623_Data_Flag-PilG-PilH-DA-cyaB-_lacP1_sol_rep5.csv
Randomized reduction from 57043 to 1000 - 20220324_626_Data_Flag-PilG-PilH-DE-cyaB-_lacP1_liq_rep5.csv
Randomized reduction from 64379 to 1000 - 20220324_626_Data_Flag-PilG-PilH-DE-cyaB-_lacP1_sol_rep5.csv
Randomized reduction from 58073 to 1000 - 20220324_628_Data_Flag-PilG-cyaB-_lacP1_liq_rep5.csv
Randomized reduction from 55281 to 1000 - 20220324_628_Data_Flag-PilG-cyaB-_lacP1_sol_rep5.csv
Randomized reduction from 58289 to 1000 - 20220324_678_Data_Flag-PilG-PilH-DE-cpdA-_lacP1_liq_rep5.cs

In [6]:
# if you named your channels differently in the exported csv file, rename them here below!

df_long = pd.read_csv('Summary_lacP1.csv', sep=',', na_values='*')
df_long['lacP1_ratio']=df_long['yfp']/df_long['mKate2'] # rename if different channel names

print("Length of dataframe to plot:",len(df_long))
df_long.head()

Length of dataframe to plot: 110000


Unnamed: 0,yfp,mKate2,Strain,Growth,Bio_Rep,lacP1_ratio
0,1049.88,1722.87,WT,Liquid,1,0.609379
1,1035.84,2230.74,WT,Liquid,1,0.464348
2,1058.46,2851.2,WT,Liquid,1,0.371233
3,1045.2,2521.53,WT,Liquid,1,0.41451
4,1701.96,7581.6,WT,Liquid,1,0.224486


In [9]:
param='lacP1_ratio' # this is the name of the main parameter that is going to be plotted, can be changed to different parameters in principle
    
[means, stds, indexes, labels] = getDFStats(df_long, param)

means.to_csv("Means_lacP1.csv", index=False, encoding='utf-8-sig')
means.head(100)

Unnamed: 0,Strain,Growth,Labels,Bio_Rep,lacP1_ratio,N
0,Flag-PilG,Liquid,12.5,1,0.252754,1000
1,Flag-PilG,Liquid,12.5,2,0.281497,1000
2,Flag-PilG,Liquid,12.5,3,0.330050,1000
3,Flag-PilG,Liquid,12.5,5,0.295376,1000
4,Flag-PilG,Solid,13.5,1,3.077868,1000
...,...,...,...,...,...,...
94,pilH-,Solid,5.5,3,20.476844,1000
95,pilH-,Solid,5.5,4,20.071104,1000
96,pilH-,Solid,5.5,5,16.463190,1000
97,pilJ-,Liquid,20.5,3,0.274497,1000


In [22]:
# This shows the whole dataframe

N_sum = np.zeros(len(means))

i = 0
for strain in np.unique(means["Strain"]):    
    for condition in np.unique(means["Growth"][means["Strain"]==strain]):
        N_list = means["N"][means["Strain"]==strain][means["Growth"]==condition]
        summation = np.sum(N_list)
        
        N_sum[i] = summation
        i = i + len(N_list)
    
means["N_sum"] = N_sum.astype('int')

with pd.option_context('display.max_rows', None,
                      'display.precision', 3,
                      ):
    print(means)

                     Strain  Growth  Labels  Bio_Rep  lacP1_ratio     N  N_sum
0                 Flag-PilG  Liquid    12.5        1        0.253  1000   4000
1                 Flag-PilG  Liquid    12.5        2        0.281  1000      0
2                 Flag-PilG  Liquid    12.5        3        0.330  1000      0
3                 Flag-PilG  Liquid    12.5        5        0.295  1000      0
4                 Flag-PilG   Solid    13.5        1        3.078  1000   5000
5                 Flag-PilG   Solid    13.5        2        2.746  1000      0
6                 Flag-PilG   Solid    13.5        3        3.906  1000      0
7                 Flag-PilG   Solid    13.5        4        3.378  1000      0
8                 Flag-PilG   Solid    13.5        5        3.073  1000      0
9              Flag-PilG-DA  Liquid    14.5        1        0.188  1000   3000
10             Flag-PilG-DA  Liquid    14.5        2        0.215  1000      0
11             Flag-PilG-DA  Liquid    14.5        3

In [46]:
# This finally plots the datapoints (as individual points, very slow when too many)

colors = bokeh.palettes.d3['Category20'][20]*3
growth=['Liquid','Solid']

# format for following line: (df_long, means, 'reference strain', ['list of strains to plot'], , ['list of strains to ignore'], growth, param, colors, 'linear', '')
p1, me1 = plottingData(df_long, means, 'WT', ['pilG-','Flag-PilG','Flag-PilG-DA','Flag-PilG-DE',], ['Flag-PilG-PilH-DA-cyaB-','Flag-PilG-PilH-DE-cpdA-','Flag-PilG-cpda-','Flag-PilG-cyaB-','Flag-PilG-pilH-','Flag-PilG-pilH-cyaB-','Flag-PilH','Flag-PilH-DA','Flag-PilH-DE','cpdA-','cyaB-','pilH-','pilJ-'
], growth, param, colors, 'linear', '')

p1.xgrid.grid_line_color = None
p1.ygrid.grid_line_color = None
p1.xaxis.minor_tick_line_color = None
p1.yaxis.minor_tick_line_color = None
p1.y_range=Range1d(0,50) # set y-axis
p1.output_backend = 'svg'
bokeh.io.show(p1)