In [5]:
from bokeh.plotting import output_notebook 
output_notebook()  # Display Bokeh plots inline in Jupyter Notebook

In [27]:
from bokeh.plotting import figure, show
import numpy as np

def bar_chart(x_values: list, top_values: list,
              width: int = None, height: int = None, bar_width: float = None, color: str = "#CAB2D6"):
    """
    Create a simple vertical bar chart with parameters that are auto-scaled based on the data.

    Args:
        x_values (list): x-coordinates for the bars.
        top_values (list): heights for the bars.
        width (int, optional): width of the figure. Defaults to len(x_values)*50 or 300, whichever is larger.
        height (int, optional): height of the figure. Defaults to max(top_values)*1.2 or 300, whichever is larger.
        bar_width (float, optional): width of each bar. Defaults to 80% of the average gap between sorted x-values.
        color (str): color for the bars.
    """
    # Auto-calculate width if not provided:
    if width is None:
        width = max(300, len(x_values) * 50)
    
    # Auto-calculate height if not provided:
    if height is None:
        height = max(300, int(max(top_values) * 1.2))
    
    # Auto-calculate bar_width if not provided:
    if bar_width is None:
        if len(x_values) > 1:
            sorted_x = sorted(x_values)
            avg_gap = np.mean(np.diff(sorted_x))
            bar_width = avg_gap * 0.8
        else:
            bar_width = 0.5  # fallback for a single bar

    p = figure(width=width, height=height)
    p.vbar(x=x_values, width=bar_width, bottom=0, top=top_values, color=color)
    show(p)

# Example usage:
bar_chart([1, 2, 3, 4], [5, 6, 7, 8])


In [28]:
from math import pi

import pandas as pd

from bokeh.models import BasicTicker, PrintfTickFormatter
from bokeh.plotting import figure, show
from bokeh.sampledata.unemployment1948 import data
from bokeh.transform import linear_cmap

def create_heatmap(df, x_col, y_col, value_col, title, colors = [
        "#75968f",
        "#a5bab7",
        "#c9d9d3",
        "#e2e2e2",
        "#dfccce",
        "#ddb7b1",
        "#cc7878",
        "#933b41",
        "#550b1d",
    ], 
                 tooltips=None, width=900, height=400):
    """Create a rectangular heatmap plot."""
        
    x_range = sorted(df[x_col].unique().astype(str))
    y_range = sorted(df[y_col].unique(), reverse=True)
    
    p = figure(
        title=title,
        x_range=x_range,
        y_range=y_range,
        x_axis_location="above",
        width=width,
        height=height,
        tools="hover,save,pan,box_zoom,reset,wheel_zoom",
        toolbar_location="below",
        tooltips=tooltips or [(f"{y_col} {x_col}", f"@{y_col} @{x_col}"),
                             (f"{value_col}", f"@{value_col}%")],
    )

    p.grid.grid_line_color = p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.major_label_text_font_size = "7px"
    p.xaxis.major_label_orientation = pi/3

    mapper = linear_cmap(value_col, colors, 
                        low=df[value_col].min(), 
                        high=df[value_col].max())

    r = p.rect(x=x_col, y=y_col, width=1, height=1,
              source=df, fill_color=mapper, line_color=None)

    color_bar = r.construct_color_bar(
        major_label_text_font_size="7px",
        ticker=BasicTicker(desired_num_ticks=len(colors)),
        formatter=PrintfTickFormatter(format="%d%%"),
        label_standoff=6, border_line_color=None, padding=5
    )
    p.add_layout(color_bar, 'right')

    show(p)

# Prepare the dataset
data["Year"] = data["Year"].astype(str)
data = data.set_index("Year")
data.drop("Annual", axis=1, inplace=True)
data.columns.name = "Month"

# Reshape the data to a long format
df = pd.DataFrame(data.stack(), columns=["rate"]).reset_index()

# Call the function with appropriate arguments
p = create_heatmap(
    df=df,
    x_col="Year",
    y_col="Month",
    value_col="rate",
    title=f"US Unemployment ({df['Year'].min()} - {df['Year'].max()})"
)

In [39]:
import pandas as pd
from squarify import normalize_sizes, squarify

from bokeh.plotting import figure, show
from bokeh.sampledata.sample_superstore import data
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10

def create_treemap(data, group_cols, value_col, color_palette= Category10[4], 
                 title=None, width=800, height=450):
    
    """Create a hierarchical treemap visualization."""
    if len(group_cols) < 2:
        raise ValueError("At least two group columns are required.")

    grouped = data.groupby(group_cols)[value_col].sum().reset_index()
    x, y, w, h = 0, 0, width, height

    def treemap(df, col, x, y, dx, dy, N=100):
        subset = df.nlargest(N, col)
        sizes = normalize_sizes(subset[col], dx, dy)
        rects = squarify(sizes, x, y, dx, dy)
        rects_df = pd.DataFrame(rects, index=subset.index)
        # Join the original subset to preserve columns like 'Region'
        return subset.join(rects_df)

    def recursive_treemap(df, group_levels, x, y, dx, dy):
        if len(group_levels) == 1:  # Base case: Last level (smallest categories)
            return treemap(df, value_col, x, y, dx, dy, N=10)

        current_level = group_levels[0]
        next_level = group_levels[1:]

        # Get the top-level blocks
        top_blocks = treemap(df.groupby(current_level).sum().reset_index(), 
                             value_col, x, y, dx, dy)

        all_blocks = []
        for _, row in top_blocks.iterrows():
            sub_df = df[df[current_level] == row[current_level]]
            sub_blocks = recursive_treemap(sub_df, next_level, row.x, row.y, row.dx, row.dy)
            all_blocks.append(sub_blocks)

        return pd.concat(all_blocks)

    blocks = recursive_treemap(grouped, group_cols, x, y, w, h)

    p = figure(width=w, height=h, toolbar_location=None, title=title,
               x_axis_location=None, y_axis_location=None,
               tooltips=f"@{group_cols[-1]}")
    p.grid.grid_line_color = None

    # Color based on top-level category
    regions = data[group_cols[0]].unique()
    p.block('x', 'y', 'dx', 'dy', source=blocks,
            line_color='white', line_width=1,
            fill_color=factor_cmap(group_cols[0], color_palette, regions))

    # Add labels
    blocks['ytop'] = blocks.y + blocks.dy
    p.text(x='x', y='ytop', text=group_cols[-1], source=blocks,
           text_font_size='6pt', text_color='white', 
           x_offset=2, y_offset=2, text_baseline='top')
    
    show(p)


# Call the function:
p = create_treemap(
    data=data,
    group_cols=["Region", "City"],
    value_col="Sales",
    title="Treemap of Sales by Region and City",
)

In [43]:
import numpy as np

from bokeh.core.enums import MarkerType
from bokeh.io import curdoc, show
from bokeh.models import ColumnDataSource, Grid, LinearAxis, Plot, Scatter

def create_marker_plot(x, y, markers=None, size=20, color="#74add1",
                      title=None, width=300, height=300):
    """Create a plot showing different marker types."""
    if markers is None:
        markers = ["circle"] * len(x)  # Default to 'circle' if no markers provided

    source = ColumnDataSource(dict(x=x, y=y, markers=markers))
    plot = Plot(title=title, width=width, height=height, 
                min_border=0, toolbar_location=None)
    
    glyph = Scatter(x="x", y="y", size=size, fill_color=color, marker="markers")
    plot.add_glyph(source, glyph)
    
    xaxis = LinearAxis()
    yaxis = LinearAxis()

    plot.add_layout(xaxis, "below")
    plot.add_layout(yaxis, "left")

    plot.add_layout(Grid(dimension=0, ticker=xaxis.ticker))
    plot.add_layout(Grid(dimension=1, ticker=yaxis.ticker))

    show(plot)

# Create the data
N = len(MarkerType)
x = np.linspace(-2, 2, N)
y = x**2
# markers = list(MarkerType)

# Now call the function using the generated data
create_marker_plot(x, y, size=20, color="#74add1",
                   title="Marker Plot", width=300, height=300)

In [44]:

import numpy as np
import pandas as pd

from bokeh.palettes import tol
from bokeh.plotting import figure, show

# Generate random data
N = 10
df = pd.DataFrame(np.random.randint(10, 100, size=(15, N))).add_prefix("y")

# Add index column for x values
df["index"] = df.index

# Define stackers (column names to be stacked)
stackers = [f"y{i}" for i in range(N)]

def create_stacked_area(
        df, 
        stackers, 
        palette=tol["Sunset"],
        x_range=None, 
        y_range=None, 
        title=None, 
        width=600, 
        height=400
    ):
    """Create a stacked area chart."""
    p = figure(x_range=x_range or (0, len(df)-1),
              y_range=y_range or (0, df[stackers].sum(axis=1).max()*1.1),
              width=width, height=height, title=title)
    p.grid.minor_grid_line_color = "#eeeeee"
    
    p.varea_stack(stackers=stackers, x='index', color=palette,
                 legend_label=stackers, source=ColumnDataSource(df))
    
    p.legend.update(
        orientation="horizontal",
        background_fill_color="#fafafa",
        location="top_center"
    )
    show(p)

create_stacked_area(df, stackers, palette=tol["Sunset"][N], title="Stacked Area Chart")

In [45]:
import colorcet as cc
from numpy import linspace
from scipy.stats import gaussian_kde

from bokeh.models import ColumnDataSource, FixedTicker, PrintfTickFormatter
from bokeh.plotting import figure, show
from bokeh.sampledata.perceptions import probly
        
def plot_ridge(data, categories, palette, scale=20, x_range=(-5,105), width=900):
    """
    Expects:
      - data: a dict where keys are category names and values are 1d arrays (e.g., numeric samples)
      - categories: a list of categories in the desired vertical order
      - palette: list of colors with at least as many elements as there are categories
    """
    def ridge(category, arr, scale):
        # Returns (x, y) pairs scaled and repeated for each data point
        return list(zip([category] * len(arr), scale * arr))
    
    x = linspace(-20, 110, 500)
    source = ColumnDataSource(data=dict(x=x))
    p = figure(y_range=categories, width=width, x_range=x_range, toolbar_location=None)
    
    # Loop over categories to compute the density and plot
    for i, cat in enumerate(reversed(categories)):
        pdf = gaussian_kde(data[cat])
        y = ridge(cat, pdf(x), scale)
        source.add(y, cat)
        p.patch("x", cat, color=palette[i], alpha=0.6, line_color="black", source=source)
    
    p.outline_line_color = None
    p.background_fill_color = "#efefef"
    p.xaxis.ticker = FixedTicker(ticks=list(range(0, 101, 10)))
    p.xaxis.formatter = PrintfTickFormatter(format="%d%%")
    p.ygrid.grid_line_color = None
    p.xgrid.grid_line_color = "#dddddd"
    p.xgrid.ticker = p.xaxis.ticker
    p.axis.minor_tick_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.axis_line_color = None
    p.y_range.range_padding = 0.12

    show(p)

# Use the sample data provided by Bokeh
# 'probly' is a dict mapping category names to numeric arrays
data = probly

# The categories are defined in reversed order of the keys
categories = list(reversed(list(probly.keys())))

# Create a color palette using colorcet; here, 17 colors are used
palette = [cc.rainbow[i * 15] for i in range(17)]

# Call the function with the provided data and configuration
plot_ridge(data, categories, palette)

In [46]:
import numpy as np
from bokeh.plotting import show, figure

def plot_histogram(data, bins=np.linspace(-3, 3, 40), width=670, height=400, title="Normal (Gaussian) Distribution"):
    """
    Expects:
      - data: 1d numpy array of random samples
    """
    p = figure(width=width, height=height, toolbar_location=None, title=title)
    # Histogram
    hist, edges = np.histogram(data, density=True, bins=bins)
    p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], 
           fill_color="skyblue", line_color="white", legend_label="Samples")
    # Probability density function (assuming normal here)
    x = np.linspace(bins[0], bins[-1], 100)
    pdf = np.exp(-0.5 * x**2) / np.sqrt(2.0 * np.pi)
    p.line(x, pdf, line_width=2, line_color="navy", legend_label="PDF")
    p.y_range.start = 0
    p.xaxis.axis_label = "x"
    p.yaxis.axis_label = "PDF(x)"
    
    show(p)

# Supply the data by generating 1000 random samples from a normal distribution
rng = np.random.default_rng()
data_samples = rng.normal(loc=0, scale=1, size=1000)

# Call the histogram plotting function with the generated data
plot_histogram(data_samples)

In [47]:
from math import pi
import pandas as pd
from bokeh.palettes import Category20c
from bokeh.plotting import figure, show
from bokeh.transform import cumsum

def plot_pie(data, height=350, title="Pie Chart"):
    """
    Expects:
      - data: dict mapping category (e.g. country) to numeric values.
    """
    # Transform the data into a DataFrame
    df = pd.Series(data).reset_index(name="value").rename(columns={"index": "category"})
    df["angle"] = df["value"] / df["value"].sum() * 2 * pi
    df["color"] = Category20c[len(data)]
    
    p = figure(height=height, title=title, toolbar_location=None,
               tools="hover", tooltips="@category: @value", x_range=(-0.5, 1.0))
    p.wedge(x=0, y=1, radius=0.4,
            start_angle=cumsum("angle", include_zero=True),
            end_angle=cumsum("angle"),
            line_color="white", fill_color="color",
            legend_field="category", source=df)
    p.axis.axis_label = None
    p.axis.visible = False
    p.grid.grid_line_color = None
    show(p)

# Supply the data as a dictionary mapping country names to values
data = {
    "United States": 157,
    "United Kingdom": 93,
    "Japan": 89,
    "China": 63,
    "Germany": 44,
    "India": 42,
    "Italy": 40,
    "Australia": 35,
    "Brazil": 32,
    "France": 31,
    "Taiwan": 31,
    "Spain": 29,
}

# Call the function with the provided data
plot_pie(data)

In [48]:
from math import pi
from bokeh.io import show
from bokeh.models import (
    AnnularWedge,
    ColumnDataSource,
    Legend,
    LegendItem,
    Range1d,
)
from bokeh.plotting import figure
from bokeh.sampledata.browsers import browsers_nov_2013 as df

def plot_annular(data, colors, title="Annular Wedge Plot"):
    """
    Expects:
      - data: DataFrame (or similar) containing a 'Share' column (percentage)
      - colors: dict mapping browser names to colors
    The data is expected to have a column called 'Browser'.
    """
    # Aggregate data
    aggregated = data.groupby("Browser").sum(numeric_only=True)
    selected = aggregated[aggregated.Share >= 1].copy()
    selected.loc["Other"] = aggregated[aggregated.Share < 1].sum()
    browsers = selected.index.tolist()
    
    angles = selected.Share.map(lambda x: 2 * pi * (x / 100)).cumsum().tolist()
    source = ColumnDataSource(dict(
        start=[0] + angles[:-1],
        end=angles,
        colors=[colors[browser] for browser in browsers],
    ))
    
    # Create plot with fixed ranges
    xdr = Range1d(start=-2, end=2)
    ydr = Range1d(start=-2, end=2)
    p = figure(x_range=xdr, y_range=ydr, title=title, toolbar_location=None)
    
    glyph = AnnularWedge(x=0, y=0, inner_radius=0.9, outer_radius=1.8,
                         start_angle="start", end_angle="end", line_color="white",
                         line_width=3, fill_color="colors")
    renderer = p.add_glyph(source, glyph)
    
    legend = Legend(location="center")
    # Create a legend item for each browser in the colors dictionary
    for i, browser in enumerate(colors.keys()):
        legend.items.append(LegendItem(label=browser, renderers=[renderer], index=i))
    p.add_layout(legend, "center")
    show(p)

# Supply the data and colors based on the original code
colors = {
    "Chrome": "seagreen",
    "Firefox": "tomato",
    "Safari": "orchid",
    "Opera": "firebrick",
    "IE": "skyblue",
    "Other": "lightgray",
}

# Call the function with the Bokeh sample data and the colors mapping.
plot_annular(df, colors, title="Web browser market share (November 2013)")

In [50]:
import pandas as pd
from bokeh.models import ColumnDataSource, Whisker
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10
from bokeh.sampledata.autompg2 import autompg2

def plot_box(data, x_col="kind", y_col="hwy", title="Box Plot", background_fill_color="#eaefef"):
    """
    Expects:
      - data: DataFrame with at least two columns, one categorical (x_col) and one numeric (y_col)
    This function computes quantiles and outlier bounds, then plots a box plot with whiskers.
    """
    df = data.copy()
    df = df.rename(columns={x_col: "kind", y_col: "hwy"})
    kinds = df.kind.unique()
    
    # Compute quantiles for each category
    qs = df.groupby("kind").hwy.quantile([0.25, 0.5, 0.75]).unstack().reset_index()
    qs.columns = ["kind", "q1", "q2", "q3"]
    
    # Compute interquartile range (IQR) and outlier bounds
    iqr = qs.q3 - qs.q1
    qs["upper"] = qs.q3 + 1.5 * iqr
    qs["lower"] = qs.q1 - 1.5 * iqr
    
    # Merge quantile stats back into the main DataFrame
    df = pd.merge(df, qs, on="kind", how="left")
    
    source = ColumnDataSource(qs)
    p = figure(x_range=kinds, toolbar_location=None, title=title,
               background_fill_color=background_fill_color, y_axis_label="MPG")
    
    # Add whiskers for outlier bounds
    whisker = Whisker(base="kind", upper="upper", lower="lower", source=source)
    whisker.upper_head.size = whisker.lower_head.size = 20
    p.add_layout(whisker)
    
    # Use Category10 instead of TolRainbow7
    palette = Category10[len(kinds)] if len(kinds) <= 10 else Category10[10]
    cmap = factor_cmap("kind", palette=palette, factors=list(kinds))
    
    # Plot the box (middle 50%) split into two vertical bars
    p.vbar("kind", 0.7, "q2", "q3", source=source, color=cmap, line_color="black")
    p.vbar("kind", 0.7, "q1", "q2", source=source, color=cmap, line_color="black")
    
    # Plot outliers that fall outside the computed whiskers
    outliers = df[~df.hwy.between(df.lower, df.upper)]
    p.scatter("kind", "hwy", source=ColumnDataSource(outliers), size=6, color="black", alpha=0.3)
    
    p.xgrid.grid_line_color = None
    p.axis.major_label_text_font_size = "14px"
    p.axis.axis_label_text_font_size = "12px"
    
    show(p)

# Supply the data
data_box = autompg2[["class", "hwy"]].rename(columns={"class": "kind"})

# Create and display the box plot.
plot_box(data_box, x_col="kind", y_col="hwy", title="Highway MPG distribution by vehicle class")
