In [1]:
## INTERACTIVE DATA VISUALIZATION OF ELECTRONIC NOSE: Bokeh part
    # Data from:       [1] R. Huerta et al., Chemom. Intell. Lab. Syst. 157, 169-176 (2016).
    # Dowloaded from:  [2] UCI Machine Learning Repository:
    #                      https://archive.ics.uci.edu/ml/datasets/Gas+sensors+for+home+activity+monitoring 

#------------------------------------------------------------------ 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

from bokeh.io import output_notebook, show
from bokeh.layouts import row, column
from bokeh.plotting import figure, curdoc
from bokeh.models.widgets import MultiSelect, Div, RadioButtonGroup
from bokeh.models import ColumnDataSource, AdaptiveTicker, LinearAxis
from bokeh.models.glyphs import MultiLine
from bokeh.palettes import Category20_20, viridis

#------------------------------------------------------------------ 

# Loading metadata and data files created in 1st part
metadata = pd.read_pickle('metadata_sub.pkl')
display(metadata.head())

dataset_pd = pd.read_pickle('dataset_pd.pkl')
display(dataset_pd.head())

#------------------------------------------------------------------ 

# Creating dataframe with beta values used for
    # "Simplified model for filtering the relative humidity and 
    # temperature information from MOX sensors based on an energy band model." (see Table 2 and eq.5,6 in [1])

beta_values =  {'beta_1' : [-0.0044, -0.0110, -0.0110, -0.0110, -0.0056, -0.0039, -0.0070, -0.0057],
               'beta_2': [0.00014, 0.00034, 0.00034, 0.00033, 0.00018, 0.00012, 0.00022, 0.00020],
               'beta_3': [0.0110, 0.0240, 0.0230, 0.0230, 0.0086, 0.0071, 0.0095, 0.0029]}

  
sensors = dataset_pd.columns[2:10]
    # extracting sensors' names from column names of dataset_pd
    
bv = pd.DataFrame(beta_values, index=sensors) 
    # indexing dataframe with the sensors' names
    
display(bv)

#------------------------------------------------------------------ 

output_notebook()
%autosave 60

Unnamed: 0,class,t0,dt [h]
0,banana,13.49,1.64
1,wine,19.61,0.54
2,wine,19.99,0.66
3,banana,6.49,0.72
4,wine,20.07,0.53


Unnamed: 0,id,time,R1,R2,R3,R4,R5,R6,R7,R8,Temp.,Humidity
0,0,0,12.810492,10.3664,10.453442,11.674575,13.494583,13.275242,8.305328,9.045543,26.422667,59.470167
1,0,1,12.354258,9.678809,8.329226,8.973556,7.940278,10.751592,4.836595,5.932837,26.524783,60.481617
2,0,2,12.393392,9.731926,8.536201,9.042302,8.126959,10.784767,4.80277,5.906617,26.525275,60.077433
3,0,3,12.432925,9.77114,8.39775,9.08899,7.429536,10.764858,4.788543,5.896372,26.506642,59.916733
4,0,4,12.460583,9.778931,7.932641,9.022253,6.724887,10.728892,4.773467,5.890961,26.545908,59.960725


Unnamed: 0,beta_1,beta_2,beta_3
R1,-0.0044,0.00014,0.011
R2,-0.011,0.00034,0.024
R3,-0.011,0.00034,0.023
R4,-0.011,0.00033,0.023
R5,-0.0056,0.00018,0.0086
R6,-0.0039,0.00012,0.0071
R7,-0.007,0.00022,0.0095
R8,-0.0057,0.0002,0.0029


Autosaving every 60 seconds


In [2]:
# Decorellation of sensors' signal from temperature and humidity 
# Sensors signals R1 - R8 from dataset_pd dataframe are decorellated using eq. 6 from [1]

dictionary = {}
dataset_pd_decorr = pd.DataFrame()
Temperature = pd.Series()
Humidity = pd.Series()

for ind in range(metadata.shape[0]):
    induction = dataset_pd[dataset_pd.id == ind] 
       
    for i, sensor in enumerate(sensors):
        
        list_decorr = []
                
        for j in range(induction.shape[0]-1):
            dT = induction.iloc[j+1, 10] - induction.iloc[j, 10] 
                # change in temperature: dT = T[j+1] - T[j]
            dH = induction.iloc[j+1, 11] - induction.iloc[j, 11]
                # change in humidity: dH = H[j+1] - H[j]
            

            signal_decorr = induction.iloc[j+1, i+2] - induction.iloc[j, i+2] * math.exp(
                                            bv.iloc[i, 0] * dH
                                            + bv.iloc[i, 1] * dH * dH
                                            + bv.iloc[i, 2] * dH * dT)
                # implementation of eq. 6 from [1]
        
            list_decorr.append(signal_decorr)        
   
        dictionary.update({sensor : list_decorr})

    induction_decorr = pd.DataFrame.from_dict(dictionary)
    induction_decorr.insert(0, 'id', ind)
    induction_decorr.insert(1, 'time', np.arange(0, induction_decorr.shape[0], 1))
        # inserting columns with id and time

    dataset_pd_decorr = pd.concat([dataset_pd_decorr, induction_decorr], axis=0, ignore_index=True)
    
    Temperature = Temperature.append(induction.iloc[:-1, 10], ignore_index=True)
        # creating pd.Series containg temperature values from original dataset shortened by the last value
    Humidity = Humidity.append(induction.iloc[:-1, 11], ignore_index=True)
        # creating pd.Series containg humidity values from original dataset shortened by the last value
    
Temperature.rename('Temp.', inplace=True)
Humidity.rename('Humidity', inplace=True)
    # adding column name

dataset_pd_decorr = pd.concat([dataset_pd_decorr, Temperature], axis=1)
dataset_pd_decorr = pd.concat([dataset_pd_decorr, Humidity], axis=1)
    # concatenating temperature and humidity series to dataset_pd_decorr

display(dataset_pd_decorr.head())
display(dataset_pd_decorr.tail())

dataset_pd_decorr.to_pickle('dataset_pd_decorr.pkl')

Unnamed: 0,id,time,R1,R2,R3,R4,R5,R6,R7,R8,Temp.,Humidity
0,0,0,-0.415675,-0.601913,-2.036748,-2.603216,-5.492484,-2.482712,-3.420092,-3.065242,26.422667,59.470167
1,0,1,0.016887,0.009495,0.169434,0.028317,0.168468,0.016018,-0.047694,-0.040095,26.524783,60.481617
2,0,2,0.030314,0.02121,-0.154218,0.029988,-0.704988,-0.026932,-0.019796,-0.01574,26.525275,60.077433
3,0,3,0.029825,0.012107,-0.461385,-0.062706,-0.702932,-0.034254,-0.013683,-0.003964,26.506642,59.916733
4,0,4,0.019108,0.020937,-0.453938,-0.179333,0.004578,-0.139843,-0.024165,0.00061,26.545908,59.960725


Unnamed: 0,id,time,R1,R2,R3,R4,R5,R6,R7,R8,Temp.,Humidity
3299,96,24,-0.046297,0.024492,0.001652,0.018279,-0.004611,0.002616,-0.004933,-0.004246,27.531992,55.094883
3300,96,25,-0.012336,-0.001213,0.00079,-8.6e-05,0.007856,-0.013437,-0.009389,-0.007236,27.516567,55.202433
3301,96,26,-0.017008,-0.019738,-0.017351,-0.024559,-0.017386,-0.001672,-0.014101,-0.012695,27.511092,55.095692
3302,96,27,0.014584,0.063417,0.066397,0.08555,0.035703,0.036479,0.024124,0.017281,27.588042,54.975267
3303,96,28,-0.007345,-0.02816,-0.02169,-0.035707,-0.011291,-0.021882,-0.016404,-0.023231,27.41545,55.3992


In [7]:
def modify_doc(doc):
    
    #------------------------------------------------------------------    
    def make_dataset(induction_list, df=dataset_pd):
        '''
        Function returning ColumnDataSource containing all signals of inductions chosen in the multiselect tool, 
        original or decorellated depending on radio select button
        '''
           
        # creating for each selected signal list of lists and adding to a dictionary
        # (list of lists is needed for MultiLine glyph)
 
        induction = pd.DataFrame()
        dictionary = {} 

        for column in df.columns[1:]: # all columns except of id column 
            signal_list_of_lists = []

            for i in induction_list:
                induction = df[df.id == i]
                signal = induction[column].values.tolist()
                signal_list_of_lists.append(signal)

            dictionary.update({column : signal_list_of_lists})

        # updating the dictionary with a color for each selected induction
        color = make_palette(induction_list) 
        color_dict = {'color' : color}
        dictionary.update(color_dict)
        
        # creating ColumnDataSource, which in necessary for interactive visualization
        source = ColumnDataSource(data=dictionary)
            
        return source
    
    #------------------------------------------------------------------ 
    
    def make_plot(source, induction_list):
        '''
        Function returing plots for all signals and legend
        '''
        
        #------------------------------------------------------------------ 
        # Creating list with all the plots        
        plots = []
        
        plot_options = dict(width=400, height=150, y_axis_type=None, toolbar_location=None)  
             
        plots.append(figure(width=400, height=150, title='Sensor 1: Methane', y_axis_type=None,
                        toolbar_location='above', tools='box_zoom, save, pan, reset'))
        plots.append(figure(title='Sensor 2: Methane, Propane, Butane',
                        x_range=plots[0].x_range, **plot_options))
        plots.append(figure(title='Sensor 3: Propane', 
                        x_range=plots[0].x_range, **plot_options))
        plots.append(figure(title='Sensor 4: Hydrogen, Carbon Monoxide',
                        x_range=plots[0].x_range, **plot_options))
        plots.append(figure(title='Sensor 5: Ammonia, H\N{SUBSCRIPT TWO}S, VOC', 
                        x_range=plots[0].x_range, **plot_options))
        plots.append(figure(title='Sensor 6: Ammonia, H\N{SUBSCRIPT TWO}S, VOC',
                        x_range=plots[0].x_range, **plot_options))
        plots.append(figure(title='Sensor 7: Carbon Monoxide, combustible gases, VOC', 
                        x_range=plots[0].x_range, **plot_options))
        plots.append(figure(title='Sensor 8: Carbon Monoxide, combustible gases, VOC',
                        x_range=plots[0].x_range, **plot_options))
        plots.append(figure(title='RH', x_axis_label='Time (min)', y_axis_type=None,
                         x_range=plots[0].x_range, width=400, height=170, toolbar_location=None))
        plots.append(figure(title='Temperature', x_axis_label='Time (min)', y_axis_type=None,
                           x_range=plots[0].x_range, width=400, height=170, toolbar_location=None))
        

        yticker = AdaptiveTicker(min_interval=1, num_minor_ticks=5)
        
        y_axis_labels = ['R\N{SUBSCRIPT ONE} (k\u03A9)',
                         'R\N{SUBSCRIPT TWO} (k\u03A9)',
                         'R\N{SUBSCRIPT THREE} (k\u03A9)', 
                         'R\N{SUBSCRIPT FOUR} (k\u03A9)',
                         'R\N{SUBSCRIPT FIVE} (k\u03A9)', 
                         'R\N{SUBSCRIPT SIX} (k\u03A9)',
                         'R\N{SUBSCRIPT SEVEN} (k\u03A9)',
                         'R\N{SUBSCRIPT EIGHT} (k\u03A9)',
                         'H (%)',
                         'T (\u2103)']
        
        #------------------------------------------------------------------ 
        # Adding MultiLine glyph to each plot

        for i, each in enumerate(plots):
            each.multi_line(xs=source.column_names[0], ys=source.column_names[i+1], source=source, 
                            line_color='color', line_width=1.5)
            
            if i < plot_no-2:
                each.xaxis.visible = False

            yaxis = LinearAxis(ticker=yticker, axis_label=y_axis_labels[i])
            each.add_layout(yaxis, 'left')
 
        #------------------------------------------------------------------ 
        global_legend = make_legend(induction_list)
        
        #------------------------------------------------------------------ 
        # Adding plots and legend to layout
        plot_layout = row(
            column(*plots[:4], plots[8]), 
            column(*plots[4:8], plots[9]), 
            global_legend) 
        #------------------------------------------------------------------ 
        return plot_layout
   
   
    #------------------------------------------------------------------
    def make_palette(induction_list):
        '''
        Function returing palette with different color for each selected induction
        '''
        palette = []
        i = 0
        
        if len(induction_list) <= len(Category20_20):
            palette = Category20_20[0:len(induction_list)]
        else:
            i = len(induction_list) - len(Category20_20) 
            palette = Category20_20 + viridis(i)
            
        return palette
        
    
    #------------------------------------------------------------------
    def make_legend(induction_list):
        '''
        Function returning interactive legend in form of Div widget with HTML text 
        '''
        
        palette = make_palette(induction_list)
           
        legend = ""
        
        for i, each in enumerate(induction_list):
            label = str(each) + ': ' + metadata.iloc[each, 0]
            legend += f'<font color={palette[i]}> {label} </font>'
            legend += '<br>'
           
        global_legend = Div(text=legend, width=100, height=100)
        
        return global_legend
      
        
    #------------------------------------------------------------------
    def update_plots(attr, old, new):
        '''
        Function updating plots depending on the state of multiselect tool and radio button group 
        '''
        inductions_to_plot = [int(i) for i in induction_selection.value] 

        df_to_plot = dataset_pd_decorr if radio_button.active else dataset_pd
            
        new_source = make_dataset(inductions_to_plot, df_to_plot)
        source.data.update(new_source.data)

        curdoc().roots[0].children[2].children[2] = make_legend(inductions_to_plot)
            # as the layout created by the make_plot function is not updated on click
            # (only the source is), the legend needs to be updated in such a way

     
    
    #------------------------------------------------------------------        
    
    plot_no = 10
    global_legend = 0

    #------------------------------------------------------------------
    # Defining widgets: RadioButtonGroup & MultiSelect
    
    radio_button = RadioButtonGroup(labels=['Original signals', 'Signals decorrelated from RH & Temperature'], active=0) 
    radio_button.on_change('active', update_plots)
    
    options = []

    for ind in range(metadata.shape[0]):
        options.append((str(ind), str(ind) + ': ' + metadata.iloc[ind, 0]))
    
    induction_selection = MultiSelect(title='Inductions', value = ['0','1'], options=options)
    induction_selection.on_change('value', update_plots)
    
    #------------------------------------------------------------------
    # Reading the value of MultiSelect, creating the ColumnDataSource and plotting accordingly
    
    initial_inductions = [int(i) for i in induction_selection.value] 
    source = make_dataset(initial_inductions)
    plot = make_plot(source,initial_inductions)
    
    #------------------------------------------------------------------
    # Adding Div widget with references
    
    reference = Div(text="""Gas sensor data taken from: 
                    R. Huerta <i>et al., Chemom. Intell. Lab. Syst.</i> <b>157</b>, 169-176 (2016).
                    <br> Interactive visualization by Sylwia Nowakowska""")
    
    #------------------------------------------------------------------
    # Creating final layout and adding to document
    
    layout = column(induction_selection, radio_button, plot, reference)
    
    doc.add_root(layout)
    
    #------------------------------------------------------------------
    
show(modify_doc)  