In [10]:
import numpy as np

import pandas as pd

import bokeh
import bokeh.plotting
from bokeh.plotting import ColumnDataSource
from bokeh.models import LabelSet
from bokeh.models import FuncTickFormatter

import bokeh.io
bokeh.io.output_notebook()

In [11]:
df = pd.read_csv('200110_Deuterium_Transfer_Peak_Areas.csv', comment='#')

In [12]:
df.head()

Unnamed: 0,Species,WellNumber,Peak ID,Ret Time,Start Tm,End Tm,m/z,Area,Area Percent,Height,Height Percent,A/H,Type,Outlier
0,L,1,C18,7.419,7.39,7.445,TIC,621717,34.61,845926,41.66,0.73,L,
1,L,1,D50C24,9.479,9.445,9.52,TIC,805713,44.85,983079,48.42,0.82,L,
2,L,1,D62C30,12.313,12.265,12.365,TIC,368857,20.53,201493,9.92,1.83,L,
3,L,2,C18,7.418,7.385,7.455,TIC,11062360,26.57,9426060,31.2,1.17,L,
4,L,2,D50C24,9.477,9.435,9.525,TIC,16309329,39.17,13409275,44.39,1.22,L,


In [13]:
#function to normalize the peak are of each unique peak using the area of the C18 peak for each sample
def normalize_area(data):
    #check to make sure that the dataframe hasn't already been normalized
    if not {'Normalized Area', 'Hydrocarbon amount'}.issubset(data.columns):
        #Create array of c18 values repeated in groups of three, matching the number of unique peaks for each sample
        c18_Areas = np.repeat(data['Area'].loc[data['Peak ID'] == 'C18'],3).reset_index().drop("index", axis=1)
        
        #Divide the area of each peak in the dataframe by the corresponding c18 peak area
        data['Normalized Area'] = np.divide(data['Area'],c18_Areas['Area'])
        
        #Calculate the hydrocarbon amount, in ng, by multiplying the normalized peak area by 25.
        #25 ng of c18 was injected in each sample (1 microliter of a 25 ng/microliter solution of c18 in hexane)
        data['Hydrocarbon amount'] = data['Normalized Area']*25
        
    return data

In [14]:
df = normalize_area(df)

In [15]:
def box_and_whisker(data,plt,x_vals,y_vals):
    
    groups = data.groupby(x_vals)
    q1 = groups.quantile(q=0.25)
    q2 = groups.quantile(q=0.5)
    q3 = groups.quantile(q=0.75)
    iqr = q3 - q1
    upper = q3 + 1.5*iqr
    lower = q1 - 1.5*iqr

    # find the outliers for each category
    def outliers(group):
        cat = group.name
        return group[(group[y_vals] > upper.loc[cat][y_vals]) | (group[y_vals] < lower.loc[cat][y_vals])][y_vals]
    out = groups.apply(outliers).dropna()

    # prepare outlier data for plotting, we need coordinates for every outlier.
    if not out.empty:
        outx = []
        outy = []
        for keys in out.index:
            outx.append(keys[0])
            outy.append(out.loc[keys[0]].loc[keys[1]])

    # if no outliers, shrink lengths of stems to be no longer than the minimums or maximums
    qmin = groups.quantile(q=0.00)
    qmax = groups.quantile(q=1.00)
    upper[y_vals] = [min([x,y]) for (x,y) in zip(list(qmax.loc[:,y_vals]),upper[y_vals])]
    lower[y_vals] = [max([x,y]) for (x,y) in zip(list(qmin.loc[:,y_vals]),lower[y_vals])]

    # stems
    plt.segment(q3.reset_index()[x_vals], upper[y_vals], q3.reset_index()[x_vals], q3[y_vals], line_color="black")
    plt.segment(q3.reset_index()[x_vals], lower[y_vals], q3.reset_index()[x_vals], q1[y_vals], line_color="black")

    #boxes
    plt.vbar(q3.reset_index()[x_vals], 0.7, q2[y_vals], q3[y_vals], fill_color=None, line_color="black")
    plt.vbar(q3.reset_index()[x_vals], 0.7, q1[y_vals], q2[y_vals], fill_color=None, line_color="black")

    # whiskers (almost-0 height rects simpler than segments)
    plt.vbar(q3.reset_index()[x_vals], top=lower[y_vals],bottom=lower[y_vals], width=0.2, line_color="black")
    plt.vbar(q3.reset_index()[x_vals], top=upper[y_vals],bottom=upper[y_vals], width=0.2, line_color="black")
    
    return plt

In [21]:
plotting_df=df.loc[df['Peak ID']=='D50C24']

np.random.seed(666)  

p = bokeh.plotting.figure(plot_width=800,
                          plot_height=600,
                          title='C24',
                          x_range=['L','U'],
                          y_axis_label='ng D50C24',
                          y_axis_type='linear')

colors=['#494949','#5F56FF','#494949','#5F56FF','#494949','#C42F2F','#494949','#C42F2F','#494949','#832161','#494949','#832161']

p = box_and_whisker(plotting_df,p,'Type','Hydrocarbon amount')

for _,i in enumerate(np.unique(plotting_df['WellNumber'])):
        
    data = {'Type': plotting_df['Type'].loc[(plotting_df['WellNumber']==i)].values,
            'hydrocarbon amount': plotting_df['Hydrocarbon amount'].loc[(plotting_df['WellNumber']==i)].values}

    source = ColumnDataSource(data=data)

    offsetVal=(np.random.rand(1)[0]-0.5)*0.5

    p.circle(bokeh.transform.dodge('Type',  offsetVal,  range=p.x_range),
           'hydrocarbon amount',
           source=source,
           color=bokeh.transform.factor_cmap('Type',colors,Identifiers),
           alpha=0.6,
           size=7)

p.xgrid.visible = False
p.ygrid.visible = False

# Add custom axis

p.xaxis.formatter = FuncTickFormatter(code="""
var labels = %s;
return labels[tick];
""" %Labels)

bokeh.io.show(p)

In [22]:
plotting_df=df.loc[df['Peak ID']=='D62C30']

np.random.seed(666)  

p = bokeh.plotting.figure(plot_width=800,
                          plot_height=600,
                          title='C24',
                          x_range=['L','U'],
                          y_axis_label='ng D50C24',
                          y_axis_type='linear')

colors=['#494949','#5F56FF','#494949','#5F56FF','#494949','#C42F2F','#494949','#C42F2F','#494949','#832161','#494949','#832161']

p = box_and_whisker(plotting_df,p,'Type','Hydrocarbon amount')

for _,i in enumerate(np.unique(plotting_df['WellNumber'])):
        
    data = {'Type': plotting_df['Type'].loc[(plotting_df['WellNumber']==i)].values,
            'hydrocarbon amount': plotting_df['Hydrocarbon amount'].loc[(plotting_df['WellNumber']==i)].values}

    source = ColumnDataSource(data=data)

    offsetVal=(np.random.rand(1)[0]-0.5)*0.5

    p.circle(bokeh.transform.dodge('Type',  offsetVal,  range=p.x_range),
           'hydrocarbon amount',
           source=source,
           color=bokeh.transform.factor_cmap('Type',colors,Identifiers),
           alpha=0.6,
           size=7)

p.xgrid.visible = False
p.ygrid.visible = False

# Add custom axis

p.xaxis.formatter = FuncTickFormatter(code="""
var labels = %s;
return labels[tick];
""" %Labels)

bokeh.io.show(p)