In [1]:
import os
import pandas as pd
path_to_incubations = "../../lab_work/sessions/incubations"


def load_incubation_data(path_to_incubations):
    dfs = []
    for file_name in os.listdir(path_to_incubations):
        path_to_file = os.path.join(path_to_incubations, file_name)
        print(path_to_file)
        dfs.append(pd.read_csv(path_to_file))

    result = pd.concat(dfs, ignore_index=True)

    return result
    # print(path_to_file)
    
def tidy_incubation_data(df):
    df['sample_name'] = df['sample_id']
    df['sample_id'] = df.apply(lambda row: set_sample_id(row), axis=1)
    df = df.drop(['sample_name'], axis=1)
    return df
    



def set_sample_id(row):
    sample_name = row['sample_name']
    if sample_name.startswith('40ml'):
        return f"40mL_{sample_name[-3:]}"
    else:
        return f"2mL_{sample_name}"
        
incubation_df = load_incubation_data(path_to_incubations=path_to_incubations)
incubation_df = tidy_incubation_data(incubation_df)
incubation_df

../../lab_work/sessions/incubations/2mLWetSpirullina.csv
../../lab_work/sessions/incubations/40mLDrySpirullina.csv
../../lab_work/sessions/incubations/2mLDrySpirullina.csv
../../lab_work/sessions/incubations/40mLWetSpirullina.csv


Unnamed: 0,sample_id,ratio,intended weight (mg),weight (mg),notes,incubation_start_date,biomass (mg),salt (mg),water (mg),innoculum added
0,2mL_2.1.0,01:00,109,129.0,,7/12/23,,,,
1,2mL_2.1.1,01:00,109,119.0,,7/12/23,,,,
2,2mL_2.1.2,01:00,109,165.0,,7/12/23,,,,
3,2mL_2.2.0,01:01,215,350.0,,7/12/23,,,,
4,2mL_2.2.1,01:01,215,248.0,,7/12/23,,,,
5,2mL_2.2.2,01:01,215,241.0,,7/12/23,,,,
6,2mL_2.3.0,16:01,115,118.0,,7/12/23,,,,
7,2mL_2.3.1,16:01,115,121.0,,7/12/23,,,,
8,2mL_2.3.2,16:01,115,124.0,,7/12/23,,,,
9,2mL_2.4.0,01:05,635,707.0,,7/12/23,,,,


In [20]:
import numpy as np
data_df = pd.read_csv('../../GCTCD_PDF_READER/GCPDFReader/csvs/tidied_1693276585.csv')
data_df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Sample_Name,Sample_Date,Instrument,Peak,Time,Type,Area,Height,Width,Start,End,pdf_file_name,sample_id,peak_compound,is_std,known_conc,calculated_conc
0,0,0.0,40ML1_10,8/16/2023,GCTCD,1,3.151,BP,494087.00000,188091.00000,0.0386,3.107,3.267,../../lab_work/sessions/GCTCD/20230816/40mL_1....,40mL_1.1,,False,,
1,1,1.0,40ML1_10,8/16/2023,GCTCD,2,4.693,PP,1595.22729,652.50348,0.0375,4.642,4.791,../../lab_work/sessions/GCTCD/20230816/40mL_1....,40mL_1.1,CO2,False,,2766.147757
2,2,2.0,40ML1_10,8/16/2023,GCTCD,3,5.270,BP,124.49847,48.04475,0.0332,5.238,5.311,../../lab_work/sessions/GCTCD/20230816/40mL_1....,40mL_1.1,,False,,
3,3,3.0,40ML1_10,8/16/2023,GCTCD,4,5.853,PV,188.87746,73.72960,0.0339,5.817,5.888,../../lab_work/sessions/GCTCD/20230816/40mL_1....,40mL_1.1,,False,,
4,4,4.0,40ML1_10,8/16/2023,GCTCD,5,5.961,VV,529.92957,109.93177,0.0602,5.888,6.011,../../lab_work/sessions/GCTCD/20230816/40mL_1....,40mL_1.1,,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
429,429,,40ML1_10,8/16/2023,GCFID,-1,,,-1.00000,,,,,../../lab_work/sessions/GCFID/20230816/2023081...,40mL_1.1,CH4,False,,1.000000
430,430,,40ML1_20,8/16/2023,GCFID,-1,,,-1.00000,,,,,../../lab_work/sessions/GCFID/20230816/2023081...,40mL_1.2,CH4,False,,1.000000
431,431,,40ML1_30,8/16/2023,GCFID,-1,,,-1.00000,,,,,../../lab_work/sessions/GCFID/20230816/2023081...,40mL_1.3,CH4,False,,1.000000
432,432,,40ML1_40,8/16/2023,GCFID,-1,,,-1.00000,,,,,../../lab_work/sessions/GCFID/20230816/2023081...,40mL_1.4,CH4,False,,1.000000


In [21]:
new_df = pd.merge(data_df, incubation_df, on='sample_id', how='left')
# new_df[new_df['Sample_Name']=='2ML_2_10']['sample_id']
new_df['ratio']

0      01:00
1      01:00
2      01:00
3      01:00
4      01:00
       ...  
429    01:00
430    01:01
431    16:01
432    01:05
433    01:05
Name: ratio, Length: 434, dtype: object

In [24]:

from datetime import datetime

def set_incubation_length(row):
    try:
        sample_date  = datetime.strptime(row['Sample_Date'], '%m/%d/%Y')
        incubation_start_date  = datetime.strptime(row['incubation_start_date'], '%m/%d/%y')
        return (sample_date - incubation_start_date).days

    except Exception as e:
        # print(e, row['Sample_Date'], row['incubation_start_date'])
        return None
    
def set_salt_to_biomass(row):
    if row['is_std']:
        return None
    if row['sample_id'] == 'DROP_ME':
        return None
    if row['ratio'] == "01:00":
        return "1:0"
    if row['ratio'] == "01:01":
        return "1:1"
    if row['ratio'] == "16:01":
        return "16:1"
    if row['ratio'] == "01:05":
        return "1:5"



def set_treatment_type(row):
    sample_id = row['sample_id']
    treatments = {"1": "Dry", "2": "Wet"}
    if sample_id.startswith('40mL'):
        return treatments[sample_id[5]]
    if sample_id.startswith('2mL'):
        return treatments[sample_id[4]]
    return None
        

new_df['incubation_length'] = new_df.apply(lambda row: set_incubation_length(row), axis=1)
new_df['salt_ratio'] = new_df.apply(lambda row: set_salt_to_biomass(row), axis=1)
new_df['str_ratio'] = new_df.apply(lambda row: str(row['ratio']), axis=1)
new_df['treatment'] = new_df.apply(lambda row: set_treatment_type(row), axis=1)


new_df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Sample_Name,Sample_Date,Instrument,Peak,Time,Type,Area,Height,...,notes,incubation_start_date,biomass (mg),salt (mg),water (mg),innoculum added,incubation_length,salt_ratio,str_ratio,treatment
0,0,0.0,40ML1_10,8/16/2023,GCTCD,1,3.151,BP,494087.00000,188091.00000,...,,6/26/23,1750.0,0.0,92.105263,~100 mg sand from death valley,51.0,1:0,01:00,Dry
1,1,1.0,40ML1_10,8/16/2023,GCTCD,2,4.693,PP,1595.22729,652.50348,...,,6/26/23,1750.0,0.0,92.105263,~100 mg sand from death valley,51.0,1:0,01:00,Dry
2,2,2.0,40ML1_10,8/16/2023,GCTCD,3,5.270,BP,124.49847,48.04475,...,,6/26/23,1750.0,0.0,92.105263,~100 mg sand from death valley,51.0,1:0,01:00,Dry
3,3,3.0,40ML1_10,8/16/2023,GCTCD,4,5.853,PV,188.87746,73.72960,...,,6/26/23,1750.0,0.0,92.105263,~100 mg sand from death valley,51.0,1:0,01:00,Dry
4,4,4.0,40ML1_10,8/16/2023,GCTCD,5,5.961,VV,529.92957,109.93177,...,,6/26/23,1750.0,0.0,92.105263,~100 mg sand from death valley,51.0,1:0,01:00,Dry
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
429,429,,40ML1_10,8/16/2023,GCFID,-1,,,-1.00000,,...,,6/26/23,1750.0,0.0,92.105263,~100 mg sand from death valley,51.0,1:0,01:00,Dry
430,430,,40ML1_20,8/16/2023,GCFID,-1,,,-1.00000,,...,,6/26/23,1750.0,1750.0,184.210526,~100 mg sand from death valley,51.0,1:1,01:01,Dry
431,431,,40ML1_30,8/16/2023,GCFID,-1,,,-1.00000,,...,,6/26/23,1750.0,110.0,97.894737,~100 mg sand from death valley,51.0,16:1,16:01,Dry
432,432,,40ML1_40,8/16/2023,GCFID,-1,,,-1.00000,,...,,6/26/23,1750.0,8750.0,552.631579,~100 mg sand from death valley,51.0,1:5,01:05,Dry


In [37]:
relevant_df  = new_df[(new_df['peak_compound'].notnull()) & (new_df['sample_id']!="DROP_ME")]
methane_df = relevant_df[(relevant_df['peak_compound'] == 'CH4') & (relevant_df['is_std'] == False)]
CO2_df = relevant_df[(relevant_df['peak_compound'] == 'CO2') & (relevant_df['is_std'] == False)]

methane_df[['calculated_conc', 'peak_compound', 'treatment', 'incubation_length', 'salt_ratio']]

Unnamed: 0,calculated_conc,peak_compound,treatment,incubation_length,salt_ratio
400,92.689336,CH4,Wet,35.0,1:0
402,86.148035,CH4,Wet,35.0,1:0
404,33.246407,CH4,Wet,35.0,1:1
406,30.79874,CH4,Wet,35.0,1:1
408,52.335823,CH4,Wet,35.0,16:1
410,52.439167,CH4,Wet,35.0,16:1
429,1.0,CH4,Dry,51.0,1:0
430,1.0,CH4,Dry,51.0,1:1
431,1.0,CH4,Dry,51.0,16:1
432,1.0,CH4,Dry,51.0,1:5


In [43]:
from bokeh.models import Range1d
import pandas as pd
import numpy as np
import bokeh.io
from bokeh.models import  ColumnDataSource,Range1d, LabelSet, Label
from bokeh.palettes import Spectral6

from bokeh.io import export_png
from bokeh import plotting
from bokeh.models import LinearColorMapper
from bokeh.models import ColorBar
from bokeh.transform import jitter
from bokeh. palettes import Inferno256
bokeh.io.output_notebook()





def create_chart(title, df):
    wet_df = ColumnDataSource(df[df["treatment"]=="Wet"])
    dry_df = ColumnDataSource(df[df["treatment"]=="Dry"])
    ratios = ["1:0", "16:1", "1:1", "1:5"]

    TOOLTIPS = [
    ("sample_id", "@sample_id"),
    ("ratio", "@salt_ratio")
        ]
    exp_cmap = LinearColorMapper(palette=Inferno256, low=60, high=0)
    p = plotting.figure(
        title=title,
        # frame_height=500,
        # frame_width=800,
        x_axis_label = "Salt:Organic Matter",
        y_axis_label = "Calculated Conc (ppm)",
        x_range=ratios,
        y_axis_type="log",
        y_range=[10e-2, 10e5],
        toolbar_location='above',
        tooltips = TOOLTIPS,
        sizing_mode="stretch_width"
    )
   
    p.triangle(
        source=wet_df,
        x = jitter("salt_ratio", width=0.3, range=p.x_range),
        y = "calculated_conc",
        color={"field":"incubation_length", "transform":exp_cmap},
        size=10,
        legend_label="Wet"
    )
    p.plus(
        source=dry_df,
        x = jitter("salt_ratio", width=0.3, range=p.x_range),
        y = "calculated_conc",
        color={"field":"incubation_length", "transform":exp_cmap},
        size=10,
        legend_label="Dry"
    )
    p.x_range.range_padding = 0

    p.add_layout(bokeh.models.Legend(), "right")
    p.legend.click_policy = "hide"
    bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
    p.add_layout(bar, "right")
    return p

hold = create_chart("CO2 conc", CO2_df)
bokeh.io.show(hold)



In [44]:
hold = create_chart("Methane conc", methane_df)
bokeh.io.show(hold)
# methane_df[['calculated_conc', 'peak_compound', 'treatment', 'incubation_length', 'salt_ratio']]

Salt ratio v  conc 
- color by incubation length

In [30]:
import bokeh.io
from bokeh.models import  ColumnDataSource
from bokeh import plotting
from bokeh.models import LinearColorMapper
from bokeh.models import ColorBar
from bokeh.transform import factor_cmap
from bokeh.transform import jitter

bokeh.io.output_notebook()





def salt_ratio_v_conc(title, df):
    df = ColumnDataSource(df)
    ratios = ["1:0", f"16:1", "1:1", "1:5"]

    TOOLTIPS = [("sample_id", "@sample_id")]
    exp_cmap = LinearColorMapper(palette=Inferno256, low=60, high=0)
    p = plotting.figure(
        title=title,
        x_axis_label = "Salt:Organic Matter",
        y_axis_label = "Calculated Conc (ppm)",
        x_range=ratios,
        y_axis_type="log",
        y_range=[10e0, 10e5],
        toolbar_location='above',
        tooltips = TOOLTIPS,
        sizing_mode="stretch_width"
    )
    p.circle(
        source=df,
        x = jitter("salt_ratio", width=0.6, range=p.x_range),
        # x = "calculated_conc",
        y = "calculated_conc",
        color={"field":"incubation_length", "transform":exp_cmap},
        size=10,
        )
    bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
    p.add_layout(bar, "right")
    return p

hold = salt_ratio_v_conc("CO2 conc v salt ratio", CO2_df)
bokeh.io.show(hold)



In [31]:
hold = salt_ratio_v_conc("Methane conc v salt ratio", methane_df)
bokeh.io.show(hold)