[Return to Homepage](https://seabounduk.github.io/SeaboundData/)

### Land Prototype - Data Analysis Tool<br />
**Authors:** Ryan McLaughlin and Roujia Wen<br /><br />
**Version History**<br />
01 - Dec 09, 2022 - General functionality stable (Ryan)<br />
02 - Dec 09, 2022 - Added stability for different datasets (Ryan)<br />
03 - Dec 15, 2022 - Refactor code and break code down to separate files (Roujia)<br />
04 - Jan 05, 2023 - New output formatting (Ryan)<br />
05 - Jan 12, 2023 - Fixed mass tracking utility function (Ryan)<br />

Import Libraries

In [None]:
import os
from bokeh.plotting import figure, reset_output
from bokeh.io import output_notebook, show, export_png 

from bokeh.models import ColumnDataSource, DatetimeTickFormatter, Select, CustomJS, DatePicker, Dropdown
from bokeh.models import CrosshairTool, HoverTool, CustomJSHover, Span, Range1d, FixedTicker, LinearAxis 
from datetime import datetime
from math import radians # rotate axis ticks
import numpy as np
import pandas as pd
import json
import csv

from scipy.signal import butter,filtfilt

# Our custom libraries
from io_utils import get_test_dates, check_test_date, config_to_dict
from data_processing import add_calculated_columns, coarse_data

reset_output()
output_notebook()

### Step 1: Select Test Date

The test dates available are shown below, to view data from a different date change the parameter `test_date`. Format is `"YYYY-MM-DD"`

In [None]:
test_dates = get_test_dates()

In [None]:
test_date = "2023-12-14" #<<< Pick from one of the available dates and enter here
print("Selected test date: {}".format(test_date))

In [None]:
(data_log_path, processed_data_path, event_log_path, config_file_path) = check_test_date(test_date,test_dates)
CONFIG = config_to_dict(test_date)
print("Configuration:")
CONFIG

### Step 2: Process Data

In [None]:
INPUT_DATETIME_FORMAT = CONFIG["time_format"]
EVENT_INPUT_DATETIME_FORMAT = CONFIG.get("event_time_format", CONFIG["time_format"])
PLOT_TOOLTIP_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
PLOT_XAXIS_DATETIME_FORMAT = "%Y-%m-%d\n%H:%M:%S"

# without microseconds
def str_to_datetime(s, from_=INPUT_DATETIME_FORMAT):
    """Convert a string to a datetime object"""
    return datetime.strptime(s, from_)

def str1_to_str2(s, from_=INPUT_DATETIME_FORMAT):
    """Convert INPUT_DATETIME_FORMAT to PLOT_TOOLTIP_DATETIME_FORMAT"""
    return datetime.strftime(datetime.strptime(s, from_), PLOT_TOOLTIP_DATETIME_FORMAT)

def format_df_to_source(df):
    """Convert a Pandas dataframe to the format that Bokeh accepts as a source"""
    return {col_name: list(col_data) for (col_name, col_data) in df.items()}


Import and process sensor log data

In [None]:
if os.path.isfile(processed_data_path):
    print("Found existing processed data. Importing...")
    main_df = pd.read_csv(processed_data_path, index_col=0)
    try:
        main_df["datetime"] = main_df["datetime"].map(
            lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
    except:
        main_df["datetime"] = main_df["datetime"].map(
            lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
    print("Done!")
    
else:
    print("Reading and processing raw data...")
    main_df = pd.read_csv(data_log_path)
    main_df["datetime_tooltip"] = main_df["datetime"].map(str1_to_str2)
    # main_df["datetime"] = main_df["datetime"].map(str_to_datetime)

    # These columns are reserved for tooltip text. These default values will not be displayed.
    main_df["value_display"] = ""
    main_df["event_display"] = "none"
    
    # Add calculated data columns
    main_df = add_calculated_columns(main_df, CONFIG)
    main_df = coarse_data(main_df, 5)
    main_df.reset_index()
    main_df.to_csv(processed_data_path)
    print("Done! Saved processed data to {}".format(processed_data_path))

source = ColumnDataSource(format_df_to_source(main_df))
# print()
# print("Available data columns:")
# print(", ".join(main_df.columns))

# Show example rows
# main_df.head(3)

Import and process event log data

In [None]:

event_log_df = pd.read_csv(event_log_path)
event_log_df["datetime_tooltip"] = event_log_df["Time"].map(
    lambda x: str1_to_str2(x, from_=EVENT_INPUT_DATETIME_FORMAT))
event_log_df["datetime"] = event_log_df["Time"].map(
    lambda x: str_to_datetime(x, from_=EVENT_INPUT_DATETIME_FORMAT))

# These columns are reserved for tooltip text. These default values will not be displayed.
event_log_df["value_display"] = "none"
event_log_df["event_display"] = ""

# For scatter plot purposes; this sets the y coordinate for the event dots on the plots
event_log_df["y"] = 0

event_source = ColumnDataSource(format_df_to_source(event_log_df))

# Show example rows
# event_log_df.head(3)

### Step 3: Plot Data

Plot Structure

In [None]:
DEFAULT_PLOT_HEIGHT = 500

DEFAULT_PLOT_WIDTH = 1000

DEFAULT_LEGEND_LOCATION = "top_right"

# Define the number of plots and which data lines are included within each plot
PLOT_COMPOSITION = [

{"plot_name": "State",
"metrics_included": ["state"],
"height":170,
"state_plot": True,
"yaxis_label":"State",
},

{"plot_name": "Temperature",
"metrics_included": ["tw1", "tw2", "tw3", "tc1", "tc2", "tc3","tg", "ti", "to", "tv","tcool","thop"],
"yaxis_label":"Temperature [ C ]",
},

{"plot_name": "Absolute Pressure",
"metrics_included": ["pg", "po", "pv","pi","pg_lowpass", "po_lowpass", "pv_lowpass","pi_lowpass"],
"yaxis_label":"Pressure [ kPag ]"
},

{"plot_name": "Differential Pressure",
"metrics_included": ["dpo", "dpf", "dpv","dpo_lowpass", "dpf_lowpass", "dpv_lowpass"],
"yaxis_label":"Pressure [ kPa ]"
},

{"plot_name": "Mass Flow",
"metrics_included": ["mdoti", "mdoto","mdoti_lowpass", "mdoto_lowpass"],
"yaxis_label":"Mass Flow [ kg/s ]"
},

{"plot_name": "CO2 Concentration",
"metrics_included": ["gai", "gao"],
"yaxis_label":"Volumetric Concentration [ %C02 ]"
},

{"plot_name": "Mass in Carbonator",
"metrics_included": ["ms"],
"yaxis_label":"Mass [ kg ]"
}
]

Plot Tools

In [None]:
# Plot colors
my_palette = ['#e6194B','#3cb44b','#ffe119','#4363d8',
              '#f58231','#911eb4','#42d4f4','#f032e6',
              '#bfef45', '#fabed4', '#469990', '#9A6324'
              ]

# Select tools to include
TOOLS = "pan,wheel_zoom,box_zoom,save,reset"

# For CROSSHAIR
cursor_x = Span(dimension="width", line_dash="dotted", line_width=2)
cursor_y = Span(dimension="height", line_dash="dashed", line_width=2)


# For HOVER TOOLTIP
# This formatter is used for the time that's shown on the hover tooltip
time_custom_formatter = CustomJSHover(code="""
    const x = special_vars.x
    // Create a new JavaScript Date object based on the timestamp
    // the argument is in milliseconds
    var date = new Date(x);
    // Hours part from the timestamp
    var hours = date.getHours();
    // Minutes part from the timestamp
    var minutes = "0" + date.getMinutes();
    // Seconds part from the timestamp
    var seconds = "0" + date.getSeconds();
    // Will display time in 10:30:23 format
    var formattedTime = hours + ':' + minutes.substr(-2) + ':' + seconds.substr(-2);
    return formattedTime
""")

TOOLTIPS = """

    <div>
        <div>
            <span style="font-size: 12px;"> <b> time: </b> @datetime_tooltip </span>
        </div>
        <div>
            <span style="font-size: 12px; display: @value_display;"> <b> value: </b> $snap_y</span>
        </div>
        <div>
            <span style="font-size: 12px; display: @event_display;"> <b> event: </b> @Description</span>
        </div>
    </div>
"""

#### Raw Data

In [None]:
first_plot_flag = True
for each_plot in PLOT_COMPOSITION:
    # This section is used to filter plots that contain empty data #
    not_empty = False
    for metric_name in each_plot['metrics_included']:
            if metric_name in main_df.columns:
                if main_df[metric_name].mean() != 0:
                    not_empty = True
                    break 
    # End filtering code #

    if not_empty == True:       # if the related columns are not empty
        # Create figure
        if first_plot_flag:
            p = figure(x_axis_type="datetime", 
                    width=DEFAULT_PLOT_WIDTH, 
                    height=each_plot.get("height", DEFAULT_PLOT_HEIGHT), 
                    tools=TOOLS, 
                    tooltips=TOOLTIPS
                ) 
            first_x_range = p.x_range
            first_plot_flag = False

        else:
            # linking x range with the first plot: https://docs.bokeh.org/en/latest/docs/user_guide/interaction/linking.html#linked-panning
            p = figure(x_axis_type="datetime", 
                    width=DEFAULT_PLOT_WIDTH, 
                    height=each_plot.get("height", DEFAULT_PLOT_HEIGHT), 
                    tools=TOOLS, 
                    tooltips=TOOLTIPS,
                    x_range=first_x_range
                ) 

        # Add hover tooltip
        # p.add_tools(HOVER_TOOL)       # now using TOOLTIPS instead
        # Add crosshair tool - bug for now
        # p.add_tools(CrosshairTool(overlay=[cursor_x, cursor_y])) # https://docs.bokeh.org/en/latest/docs/user_guide/interaction/linking.html#linked-crosshair
        # Autohide toolbar
        p.toolbar.autohide = True
        
        # Plot lines with markers
        
        # Note: the N in "SpectralN" indicates the number of colors in the palette. If there will be more than 6 lines in a plot, 
        # N needs to be increased, otherwise the zip function will yield maximally only 6 pairs of items
        
        for metric_name, color, in zip(each_plot["metrics_included"], my_palette):
            if (metric_name in main_df.columns) and (main_df[metric_name].mean() != 0):
                p.line(x="datetime",y=metric_name, source=source, color=color, legend_label=metric_name)

        p.circle(x="datetime",y="y", source=event_source, color="black", legend_label="events")
            
        date_pattern = "%Y-%m-%d\n%H:%M:%S"
        p.xaxis.formatter = DatetimeTickFormatter(
            seconds = date_pattern,
            minsec  = date_pattern,
            minutes  = date_pattern,
            hourmin  = date_pattern,
            hours  = date_pattern,
            days  = date_pattern,
            months  = date_pattern,
            years  = date_pattern
        )
        p.title.text = each_plot["plot_name"]
        p.xaxis.major_label_orientation=radians(80)
        
        # change to vary based on plot
        p.yaxis.axis_label = each_plot["yaxis_label"]
        
        p.legend.click_policy="hide" # https://docs.bokeh.org/en/latest/docs/user_guide/interaction/legends.html#hiding-glyphs
        p.legend.location = DEFAULT_LEGEND_LOCATION
        
        if "state_plot" in each_plot: 
            p.y_range = Range1d(0, 4)
            p.yaxis.ticker = FixedTicker(ticks=[0, 1, 2, 3, 4])
        
        show(p)

#### Pressure vs Mass, Temperature, CO2 Concentration

In [None]:
p = figure(x_axis_type="datetime", 
                width=DEFAULT_PLOT_WIDTH, 
                height=DEFAULT_PLOT_HEIGHT, 
                tools=TOOLS, 
                tooltips=TOOLTIPS
            ) 

# Add crosshair tool
# p.add_tools(CrosshairTool(overlay=[cursor_x, cursor_y])) # https://docs.bokeh.org/en/latest/docs/user_guide/interaction/linking.html#linked-crosshair
# Autohide toolbar
p.toolbar.autohide = True

date_pattern = "%Y-%m-%d\n%H:%M:%S"
p.xaxis.formatter = DatetimeTickFormatter(
    seconds = date_pattern,
    minsec  = date_pattern,
    minutes  = date_pattern,
    hourmin  = date_pattern,
    hours  = date_pattern,
    days  = date_pattern,
    months  = date_pattern,
    years  = date_pattern
)

p.xaxis.major_label_orientation=radians(80)
p.yaxis.axis_label = "Time "


y1 = "pg"
color_index = 1

# 1ST AXIS - PRESSURE
p.yaxis.axis_label = "Pressure [ kPa ]"
p.line(x="datetime",y=(y1+"_lowpass"),source=source,legend_label=y1,color=my_palette[0])
p.y_range = Range1d(
    # start = 10,
    start = main_df[y1].min() * (1-.05),
    end = main_df[y1].max() * (1+.05)
)

#2ND AXIS - TEMPERATURE

p.add_layout(LinearAxis(y_range_name="y2_range",axis_label="Temperature [ C ]"),"right")
temp_dictionary = next(item for item in PLOT_COMPOSITION if item["plot_name"] == "Temperature")
for metric_name in temp_dictionary["metrics_included"]:
    if (metric_name in ["ti","tc1","tc2","tc3","to"]) and (main_df[metric_name].mean() != 0):
        p.line(x="datetime",y=metric_name,source=source,legend_label=metric_name,color=my_palette[color_index],y_range_name="y2_range")
        color_index = color_index+1
        
        p.extra_y_ranges["y2_range"] = Range1d(
            start = np.minimum(main_df["tc2"].min(),main_df["ti"].min()) * (1-.05),
            end = np.maximum(main_df["tc2"].max(),main_df["ti"].max()) * (1+.05)
            # start = 200,
            # end = 350
        )

#3RD AXIS - MASS 
if 'ms' in main_df.columns:
    if main_df['ms'].mean() != 0:
        p.add_layout(LinearAxis(y_range_name="y3_range",axis_label="Mass [ kg ]"),"right")
        p.line(x="datetime",y='ms',source=source,legend_label='ms',color=my_palette[color_index],y_range_name="y3_range")
        color_index = color_index+1
        
        p.extra_y_ranges["y3_range"] = Range1d(
            start = 0,
            end = 2100
        )

#4TH AXIS - GAS CONCENTRATION
if test_date != "2022-11-25":
    p.add_layout(LinearAxis(y_range_name="y4_range",axis_label="Gas Concentration [ %CO2 ]"),"right")
    gas_dictionary = next(item for item in PLOT_COMPOSITION if item["plot_name"] == "CO2 Concentration")
    for metric_name in gas_dictionary["metrics_included"]:
        p.line(x="datetime",y=metric_name,source=source,legend_label=metric_name,color=my_palette[color_index],y_range_name="y4_range")
        color_index = color_index+1
    
    p.extra_y_ranges["y4_range"] = Range1d(
        start = np.minimum(main_df["gai"].min(),main_df["gao"].min())* (1-.05),
        end = np.maximum(main_df["gai"].max(),main_df["gao"].max())* (1+.05)
    )

# 5TH AXIS - FILL HEIGHT
if ("fill_height" not in CONFIG["exclude_metrics"]) and ("fill_height" in main_df):
    p.add_layout(LinearAxis(y_range_name="y5_range",axis_label="Height [ m ]"),"right")
    p.line(x="datetime",y='fill_height',source=source,legend_label='fill_height',color=my_palette[color_index],y_range_name="y5_range")
    color_index = color_index+1
    
    p.extra_y_ranges["y5_range"] = Range1d(
        start = main_df["fill_height"].min()* (1-.05),
        end = main_df["fill_height"].max()* (1+.05)
    )

p.legend.location = DEFAULT_LEGEND_LOCATION
p.legend.click_policy="hide" # https://docs.bokeh.org/en/latest/docs/user_guide/interaction/legends.html#hiding-glyphs

show(p)

#### Effective Particle Size Analysis

In [None]:
# p = figure(x_axis_type="datetime", 
#                 width=DEFAULT_PLOT_WIDTH, 
#                 height=DEFAULT_PLOT_HEIGHT, 
#                 tools=TOOLS, 
#                 tooltips=TOOLTIPS
#             ) 

# # Add crosshair tool
# # p.add_tools(CrosshairTool(overlay=[cursor_x, cursor_y])) # https://docs.bokeh.org/en/latest/docs/user_guide/interaction/linking.html#linked-crosshair
# # Autohide toolbar
# p.toolbar.autohide = True

# date_pattern = "%Y-%m-%d\n%H:%M:%S"
# p.xaxis.formatter = DatetimeTickFormatter(
#     seconds = date_pattern,
#     minsec  = date_pattern,
#     minutes  = date_pattern,
#     hourmin  = date_pattern,
#     hours  = date_pattern,
#     days  = date_pattern,
#     months  = date_pattern,
#     years  = date_pattern
# )

# p.xaxis.major_label_orientation=radians(80)
# p.yaxis.axis_label = "Time "


# y1 = "pg"
# color_index = 1

# # 1ST AXIS - PRESSURE
# p.yaxis.axis_label = "Pressure [ kPa ]"
# p.line(x="datetime",y=(y1+"_lowpass"),source=source,legend_label=y1,color=my_palette[0])
# p.y_range = Range1d(
#     # start = 10,
#     start = main_df[y1].min() * (1-.05),
#     end = main_df[y1].max() * (1+.05)
# )

# # add more axes
# p.extra_y_ranges = {
#     "y2_range": Range1d(
#         start = np.minimum(main_df["tc2"].min(),main_df["ti"].min()) * (1-.05),
#         end = np.maximum(main_df["tc2"].max(),main_df["ti"].max()) * (1+.05)
#         # start = 200,
#         # end = 350
#     ),
#     "y3_range": Range1d(
#         start = 0,
#         end = 1500
#     ),
#     "y4_range": Range1d(
#         start = np.minimum(main_df["gai"].min(),main_df["gao"].min())* (1-.05),
#         end = np.maximum(main_df["gai"].max(),main_df["gao"].max())* (1+.05)
#     ),
#     "y5_range": Range1d(
#         start = main_df["mdoti_lowpass"].min()* (1-.05),
#         end = main_df["mdoti_lowpass"].max()* (1+.05)
#     ),
#     "y6_range": Range1d(
#         start = main_df["fill_height"].min()* (1-.05),
#         end = main_df["fill_height"].max()* (1+.05)
#     )
# }

# #2ND AXIS - TEMPERATURE

# p.add_layout(LinearAxis(y_range_name="y2_range",axis_label="Temperature [ C ]"),"right")
# temp_dictionary = next(item for item in PLOT_COMPOSITION if item["plot_name"] == "Temperature")
# for metric_name in temp_dictionary["metrics_included"]:
#     if (metric_name in ["ti","tc1","tc2"]) and (main_df[metric_name].mean() != 0):
#         p.line(x="datetime",y=metric_name,source=source,legend_label=metric_name,color=my_palette[color_index],y_range_name="y2_range")
#         color_index = color_index+1

# #3RD AXIS - MASS 
# if 'ms' in main_df.columns:
#     if main_df['ms'].mean() != 0:
#         p.add_layout(LinearAxis(y_range_name="y3_range",axis_label="Mass [ kg ]"),"right")
#         p.line(x="datetime",y='ms',source=source,legend_label='ms',color=my_palette[color_index],y_range_name="y3_range")
#         color_index = color_index+1
#         # 6TH AXIS - FILL HEIGHT
#         p.add_layout(LinearAxis(y_range_name="y6_range",axis_label="Height [ m ]"),"right")
#         p.line(x="datetime",y='fill_height',source=source,legend_label='fill_height',color=my_palette[color_index],y_range_name="y6_range")
#         color_index = color_index+1

# #4TH AXIS - GAS CONCENTRATION
# if test_date != "2022-11-25":
#     p.add_layout(LinearAxis(y_range_name="y4_range",axis_label="Gas Concentration [ %CO2 ]"),"right")
#     metric_name = "gai"
#     p.line(x="datetime",y=metric_name,source=source,legend_label=metric_name,color=my_palette[color_index],y_range_name="y4_range")
#     color_index = color_index+1

# #5TH AXIS - GAS MASS FLOW
# if test_date != "2022-11-25": # need this?
#     p.add_layout(LinearAxis(y_range_name="y5_range",axis_label="Gas Flow Rate [ kg/s ]"),"right")
#     for metric_name in ["mdoti_lowpass"]:
#         p.line(x="datetime",y=metric_name,source=source,legend_label=metric_name,color=my_palette[color_index],y_range_name="y5_range")
#         color_index = color_index+1

# p.legend.location = DEFAULT_LEGEND_LOCATION
# p.legend.click_policy="hide" # https://docs.bokeh.org/en/latest/docs/user_guide/interaction/legends.html#hiding-glyphs

# show(p)

[Return to Homepage](https://seabounduk.github.io/SeaboundData/)