In [13]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, Range1d, BoxAnnotation, DatetimeTickFormatter
import pandas as pd
import numpy as np
import random

# Step 1: Create two sample dataframes (dev_df and prod_df)
def create_dataframes():
    timestamps = pd.date_range(start="2023-01-01 00:00", periods=120, freq="30T")

    # Generate random volumes and components
    components = ['ComponentA', 'ComponentB', 'ComponentC', 'ComponentD', 'ComponentE']
    
    dev_volume = np.random.randint(50, 500, size=len(timestamps))
    dev_components = [random.choice(components) if random.random() > 0.2 else None for _ in timestamps]  # Random assignment of components
    
    prod_volume = np.random.randint(50, 500, size=len(timestamps))
    prod_components = [random.choice(components) if random.random() > 0.2 else None for _ in timestamps]  # Random assignment of components

    # Create DataFrames
    dev_df = pd.DataFrame({"timestamp": timestamps, "volume": dev_volume, "component": dev_components})
    prod_df = pd.DataFrame({"timestamp": timestamps, "volume": prod_volume, "component": prod_components})
    
    return dev_df, prod_df

# Step 2: Plot filtered data with anomaly highlights
def plot_filtered_components(dev_df, prod_df, component_name):
    # Filter DataFrames by component name
    dev_filtered = dev_df[dev_df['component'] == component_name]
    prod_filtered = prod_df[prod_df['component'] == component_name]

    # Prepare data for Bokeh
    dev_source = ColumnDataSource(dev_filtered)
    prod_source = ColumnDataSource(prod_filtered)

    # Output in the notebook
    output_notebook()

    # Create Bokeh figure
    p = figure(
        title=f"Volume Comparison for Component: {component_name}",
        x_axis_label="Timestamp",
        y_axis_label="Volume",
        x_axis_type="datetime",
        width=1000, height=500
    )

    # Add Dev Volume Line
    p.line(
        x="timestamp", y="volume", source=dev_source, 
        line_width=2, color="blue", legend_label="Dev Volume"
    )
    p.circle(
        x="timestamp", y="volume", source=dev_source, 
        size=8, color="blue", legend_label="Dev Volume"
    )

    # Add anomaly highlights for Dev Volume
    for _, row in dev_filtered.iterrows():
        if row["volume"] > 400:  # Example anomaly condition
            p.add_layout(BoxAnnotation(
                left=row["timestamp"] - pd.Timedelta(minutes=15),
                right=row["timestamp"] + pd.Timedelta(minutes=15),
                top=row["volume"] + 20,
                bottom=row["volume"] - 20,
                fill_alpha=0.3, fill_color="red"
            ))

    # Add Prod Volume Line
    p.line(
        x="timestamp", y="volume", source=prod_source, 
        line_width=2, color="green", legend_label="Prod Volume"
    )
    p.circle(
        x="timestamp", y="volume", source=prod_source, 
        size=8, color="green", legend_label="Prod Volume"
    )

    # Add anomaly highlights for Prod Volume
    for _, row in prod_filtered.iterrows():
        if row["volume"] > 400:  # Example anomaly condition
            p.add_layout(BoxAnnotation(
                left=row["timestamp"] - pd.Timedelta(minutes=15),
                right=row["timestamp"] + pd.Timedelta(minutes=15),
                top=row["volume"] + 20,
                bottom=row["volume"] - 20,
                fill_alpha=0.3, fill_color="orange"
            ))

    # Format the x-axis
    p.xaxis.formatter = DatetimeTickFormatter(
        hours="%Y-%m-%d %H:%M",
        days="%Y-%m-%d %H:%M",
        months="%Y-%m-%d %H:%M",
        years="%Y-%m-%d %H:%M"
    )
    p.xaxis.major_label_orientation = 0.5  # Tilt labels at 45 degrees

    # Customize legend
    p.legend.title = "Legend"
    p.legend.label_text_font_size = "10pt"
    p.legend.location = "top_left"

    # Show the plot
    show(p)




In [14]:
def generate_visualizations(dev_df, prod_df, components):
    """
    Loops through a list of components and calls the plotting function
    to generate visualizations for each component.
    
    Args:
    - dev_df (DataFrame): The development volume DataFrame.
    - prod_df (DataFrame): The production volume DataFrame.
    - components (list): List of component names to visualize.
    """
    for component in components:
        print(f"Generating visualization for component: {component}")
        plot_filtered_components(dev_df, prod_df, component)


In [15]:
# Step 1: Generate the DataFrames
dev_df, prod_df = create_dataframes()
# Step 2: Define the list of components
components_list = ['ComponentA', 'ComponentB', 'ComponentC', 'ComponentD', 'ComponentE']
# Step 3: Generate visualizations for each component
generate_visualizations(dev_df, prod_df, components_list)


Generating visualization for component: ComponentA




Generating visualization for component: ComponentB




Generating visualization for component: ComponentC




Generating visualization for component: ComponentD




Generating visualization for component: ComponentE


