In [121]:
import pandas as pd
import numpy as np
from os import mkdir
from os.path import isdir
import matplotlib.pyplot as plt
from scipy import stats
from scipy.signal import savgol_filter

In [3]:
with open("../data/clariostar/legend.csv", "w") as csv_legend:
    with open("../data/clariostar/legend.tsf", "r") as legendfile:
        for line in legendfile.readlines():
            line = line.rstrip().split(" ")
            text = line[0] + ","
            csv_legend.write(text)
            for index, element in enumerate(line):

                if index != 0:
                    if element != "":
    
                        if index+1 != len(line):
                            element = element + " " 
                            csv_legend.write(element)
                        else:
                            csv_legend.write(element + "\n")

In [4]:
legend = pd.read_csv("../data/clariostar/legend.csv")

In [71]:
legend_dict = {}
for media in legend["M_Number"]:
    legend_dict[media] = legend[legend["M_Number"] == media]["BMM9 variant"].item()

In [5]:
'''read_clariostar_table

    Function for reading a csv file.

    :param dataframe_path
        :type str
    :param separator
        :type str
    :param lines_to_skip
        :type int

'''
def read_clariostar_table(dataframe_path:str,separator:str,lines_to_skip:int)->pd.DataFrame:
    try:
        return pd.read_csv(dataframe_path,sep=separator,skiprows=lines_to_skip)
    except Exception as e:
        raise Exception("[-] ERROR reading dataframe with exception: {}".format(e))

In [29]:
'''read_layout

    This function reads the layout dataframe and returns a dictionary with
    content information for each well and a dictionary with media and blank association.
    
'''
def read_layout(layout:pd.DataFrame)->tuple:
    try:
        layout_dict = {}
        blank_vals = []
        media_vals = []
        for index in layout.index:
            for row_index, media_name in enumerate(layout.loc[index,:]):
                layout_row = index + "0" + str(row_index+1)
                content = media_name
                layout_dict[layout_row] = media_name

                if "M" in media_name and media_name not in media_vals:
                    media_vals.append(media_name)
                elif "B" in media_name and media_name not in blank_vals:
                    blank_vals.append(media_name)

        media_blank_dict = {}
        for media in media_vals:
            media_number = int(media.split("M")[-1])
            for blank in blank_vals:
                blank_number = int(blank.split("B")[-1])
                if media_number == blank_number:
                    media_blank_dict[media] = blank
                    break
            if media not in list(media_blank_dict.keys()):
                raise Exception("[-] ERROR there is no blank for media: {}".format(media))
        return layout_dict, media_blank_dict
    except Exception as e:
        raise Exception("[-] ERROR during creation of layout_dict with exception: {}".format(e))

In [191]:
'''create_od_gfp_dataframes

    This function takes the raw dataframe and the layout_dict dictionary to 
    create new columns that can be used to assess the data for OD600 and GFP.
    This data is within the rows: "Raw Data (600 1)" and "Raw Data (470-15 2)".
    The function also calculates BLANK values for each experimental unit and 
    finally produces blank corrected datasets for each experiment, which are 
    returned in form of dictionaries.
    
'''
def create_od_gfp_dataframes(dataframe:pd.DataFrame,layout_dict:dict,blank_dict:dict)->tuple:
    try:
        print("[*] Creating OD/GFP dataframe dictionaries")
        data = dataframe.copy()

        # get row index of content, group and well rows
        content_index = data.index[data.iloc[:,0] == "Content"].to_list()
        group_index = data.index[data.iloc[:,0] == "Group"].to_list()
        well_index = data.index[data.iloc[:,0] == "Well"].to_list()
        if len(content_index) != 1:
            raise Exception("[-] There are multiple Content indexes in the dataframe!")
        if len(group_index) != 1:
            raise Exception("[-] There are multiple Group indexes in the dataframe!")
        if len(well_index) != 1:
            raise Exception("[-] There are multiple Well indexes in the dataframe!")

        # rename columns based on well row and layout dict
        new_columns = []
        for col in data.loc[:,:].columns:
            well_value = data[col].loc[well_index[0]]
            if well_value in list(layout_dict.keys()):
                new_col = layout_dict[well_value]
            else:
                new_col = col
            new_columns.append(new_col)
        print("[*] Setting up new columns")
        data.columns = new_columns

        # get OD and GFP data
        od_data = data[data["Unnamed: 0"] == "Raw Data (600 1)"]
        gfp_data = data[data["Unnamed: 0"] == 'Raw Data (470-15 2)']

        # get a list of unique media entries
        print("[*] Creating list of unique experiments")
        media_entries = []
        for key in layout_dict.keys():
            media = layout_dict[key]
            if "M" in media:
                if media not in media_entries:
                    media_entries.append(media)

        # loop over each media data and create blank corrected dataframes
        od_dict = {}
        gfp_dict = {}
        print("[*] Looping over unique media entries (experiments)")
        for media in media_entries:
            blank = blank_dict[media]
            blank_data_od = od_data[blank].astype(float)
            blank_data_gfp = gfp_data[blank].astype(float)
            
            media_data_od = od_data[media].astype(float)
            media_data_gfp = gfp_data[media].astype(float)
            
            # calculate BLANK values as mean values
            if type(blank_data_gfp) == pd.Series:
                blank_od = blank_data_od[:50].mean()
                blank_gfp = blank_data_gfp[:50].mean()
            else:
                blank_od = blank_data_od.mean(axis=1)[:50].mean()
                blank_gfp = blank_data_gfp.mean(axis=1)[:50].mean()

            print("\t[*] BLANK value OD600 for media {} is {}".format(media, blank_od))
            print("\t[*] BLANK value GFP is {}".format(blank_gfp))
            media_data_od_blank_corrected = media_data_od - blank_od
            media_data_gfp_blank_corrected = media_data_gfp - blank_gfp

            media_data_od_blank_corrected.index = list(range(1,media_data_od_blank_corrected.index.size+1,1))
            media_data_gfp_blank_corrected.index = list(range(1,media_data_gfp_blank_corrected.index.size+1,1))
            
            od_dict[media] = media_data_od_blank_corrected
            gfp_dict[media] = media_data_gfp_blank_corrected
            
        return od_dict, gfp_dict
    except Exception as e:
        raise Exception("[-] ERROR creating od/gfp dataframe with exception: {}".format(e))

In [188]:
'''plot_od_gfp

    This function takes the od_dict and gfp_dict produced by the 
    create_od_gfp_dataframes function and creates basic overview plots.
    The save path is composed of the media and date value.

'''
def plot_od_gfp(od_data_dict:dict,gfp_data_dict:dict,date:str)->int:
    try:
        print("[*] Saving plots in ../results/figures/clariostar/{}/*".format(date))
        savep = "../results/figures/clariostar/{}/".format(date)
        if isdir(savep) == False:
            print("\t[*] Directory does not yet exist, creating directory: {}".format(savep))
            mkdir(savep)
            
        print("[*] Looping over keys and plotting data ...")
        for media in od_data_dict.keys():
            od_data = od_data_dict[media]
            gfp_data = gfp_data_dict[media]


            if od_data.empty != True:
                time = (od_data.index.to_numpy() * 10)/60
                time = time.round(2)
                x_index = od_data.index.to_numpy()
                time = time[::40]
                x_index = x_index[::40]
    
                mean_od_values = od_data.mean(axis=1)
                mean_std_values = od_data.std(axis=1)
                x_index_mean = od_data.index.to_numpy()
                
                plt.figure(figsize=(8,6))
                plt.plot(od_data, color="blue",alpha=0.5)
                plt.errorbar(x_index_mean[::40],
                             mean_od_values[::40],
                             yerr=mean_std_values[::40],
                             ecolor="red",capsize=5, color="red")
                plt.xticks(x_index,time)
                plt.title(media + ": " + legend_dict[media], fontsize=10)
                plt.xlabel("time in hours [h]")
                plt.ylabel("blank corrected OD600")
                plt.ylim(-0.2,3.0)
                plt.grid()
                plot_path = savep + media + "_od_data.svg"
                plt.savefig(plot_path,dpi=300)
                plt.close()
            else:
                print("\t[WARNING] There is no OD data available for {}".format(media))
            if gfp_data.empty != True:
                time = (gfp_data.index.to_numpy() * 10)/60
                time = time.round(2)
                x_index = gfp_data.index.to_numpy()
                time = time[::40]
                x_index = x_index[::40]
                
                plt.figure(figsize=(8,6))
                plt.plot(gfp_data, color="green")
                plt.xticks(x_index,time)
                plt.title(media + ": " + legend_dict[media], fontsize=10)
                plt.xlabel("time in hours [h]")
                plt.ylabel("blank corrected GFP RFU")
                plt.grid()
                plot_path = savep + media + "_gfp_data.svg"
                plt.savefig(plot_path,dpi=300)
                plt.close()
            else:
                print("\t[WARNING] There is no GFP data available for {}".format(media))

        print("[+] DONE")
        return 1
    except Exception as e:
        raise Exception("[-] ERROR creating basic OD/GFP plots with exception: {}".format(e))

In [186]:
gfp_dict["M6"].empty

True

In [167]:
from os import listdir

In [196]:
data_path = "../data/clariostar/data_files/"
data_layout_dict = {}
dates = []
for date in listdir(data_path):
    date = date.split("_clariostar_")[0]
    if date not in dates:
        dates.append(date)
        data_layout_dict[date] = (data_path + date+"_clariostar_data.CSV", data_path + date + "_clariostar_layout.csv")

In [198]:
for date in data_layout_dict:
    print("[*] WORKING ON DATE: {}".format(date))
    path_to_data = data_layout_dict[date][0]
    path_to_layout = data_layout_dict[date][1]
    
    data = read_clariostar_table(path_to_data,separator=";",lines_to_skip=5)
    layout = pd.read_csv(path_to_layout, sep=";", index_col=0)
    
    layout_dict, media_blank_dict = read_layout(layout)
    od_dict, gfp_dict = create_od_gfp_dataframes(data,layout_dict,media_blank_dict)
    plot_od_gfp(od_dict, gfp_dict, date)

[*] WORKING ON DATE: 24_02_02
[*] Creating OD/GFP dataframe dictionaries
[*] Setting up new columns
[*] Creating list of unique experiments
[*] Looping over unique media entries (experiments)
	[*] BLANK value OD600 for media M1 is 0.15159
	[*] BLANK value GFP is 1296.0
	[*] BLANK value OD600 for media M2 is 0.13072
	[*] BLANK value GFP is 1474.61
	[*] BLANK value OD600 for media M3 is 0.1323
	[*] BLANK value GFP is 1302.68
	[*] BLANK value OD600 for media M4 is 0.15354000000000004
	[*] BLANK value GFP is 1092.32
	[*] BLANK value OD600 for media M5 is 0.14190000000000003
	[*] BLANK value GFP is 1176.18
[*] Saving plots in ../results/figures/clariostar/24_02_02/*
[*] Looping over keys and plotting data ...
[+] DONE
[*] WORKING ON DATE: 24_02_05
[*] Creating OD/GFP dataframe dictionaries
[*] Setting up new columns
[*] Creating list of unique experiments
[*] Looping over unique media entries (experiments)
	[*] BLANK value OD600 for media M1 is 0.22973
	[*] BLANK value GFP is 1289.99
	[*] B