In [1]:
# Bokeh setup (paste once)
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, LinearColorMapper, ColorBar
from bokeh.transform import dodge
from bokeh.palettes import Viridis256, RdBu11
import pandas as pd
import numpy as np

output_notebook()

df = pd.read_csv("/content/cleaned_air_quality.csv")

# detect date column or create index time
date_cols = [c for c in df.columns if "date" in c.lower() or "time" in c.lower()]
date_col = date_cols[0] if date_cols else None
if date_col and date_col in df.columns:
    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
else:
    df["_index_time"] = pd.RangeIndex(len(df))
    date_col = "_index_time"

num_cols = df.select_dtypes(include=["number"]).columns.tolist()
col1 = num_cols[0] if len(num_cols) > 0 else None
col2 = num_cols[1] if len(num_cols) > 1 else col1

# sample for interactive speed
sample_df = df.sample(n=min(2000, len(df)), random_state=1) if len(df) > 2000 else df.copy()
source = ColumnDataSource(sample_df)


In [2]:
#1. Line Plot
p = figure(title=f"1. Line Plot â€” {col1}", x_axis_type='datetime' if date_col != "_index_time" else 'auto',
           x_axis_label="Time" if date_col != "_index_time" else "Index", y_axis_label=col1, height=350)
p.line(x=date_col, y=col1, source=source, line_width=2)
p.add_tools(HoverTool(tooltips=[("index", "$index"), (col1, f"@{col1}")]))
show(p)

In [3]:
#2. Scatter Plot
p = figure(title=f"2. Scatter Plot â€” {col2} vs {col1}", height=350,
           x_axis_label=col1, y_axis_label=col2)
p.circle(x=col1, y=col2, source=source, size=6, alpha=0.6)
p.add_tools(HoverTool(tooltips=[(col1, f"@{col1}"), (col2, f"@{col2}")]))
show(p)



In [4]:
#3. Bar Chart
bar_df = sample_df.reset_index().head(20)
bar_src = ColumnDataSource(bar_df)
p = figure(title=f"3. Bar Chart â€” first 20 rows ({col1})", x_range=[str(i) for i in bar_df['index'].tolist()], height=350)
p.vbar(x=[str(i) for i in bar_df['index']], top=bar_df[col1].values, width=0.8)
p.xaxis.axis_label = "Index"
p.yaxis.axis_label = col1
show(p)

In [12]:
#4. Pie Chart
# corrected: put the colors into the ColumnDataSource and reference them by column name
vals = sample_df[col1].dropna().head(5)
labels = [f"row_{i}" for i in vals.index]
angles = (vals / vals.sum()) * 2 * np.pi
starts = np.cumsum(np.concatenate([[0], angles[:-1]]))
ends = np.cumsum(angles)

from bokeh.palettes import Viridis256
colors = Viridis256[::max(1, int(len(Viridis256)/len(labels)) )][:len(labels)]

pie_src = ColumnDataSource(dict(
    start=starts,
    end=ends,
    label=labels,
    value=vals.values,
    color=colors
))

p = figure(title=f"4. Pie Chart â€” first 5 {col1} values", height=350, x_range=(-1,1), y_range=(-1,1))
p.wedge(x=0, y=0, radius=0.8,
        start_angle='start', end_angle='end',
        line_color="white", fill_color='color', source=pie_src)
p.add_tools(HoverTool(tooltips=[("label", "@label"), (col1, "@value")]))
show(p)


In [6]:
#5. Histogram
vals = sample_df[col1].dropna()
hist, edges = np.histogram(vals, bins=30)
p = figure(title=f"5. Histogram â€” {col1}", height=350)
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_alpha=0.7)
p.xaxis.axis_label = col1
p.yaxis.axis_label = "Count"
show(p)

In [7]:
#6. Box Plot
# compute quartiles and whiskers for the whole col1 distribution
series = sample_df[col1].dropna()
q1 = np.percentile(series, 25)
q2 = np.percentile(series, 50)
q3 = np.percentile(series, 75)
iqr = q3 - q1
low = series[series >= (q1 - 1.5 * iqr)].min()
high = series[series <= (q3 + 1.5 * iqr)].max()

box_src = ColumnDataSource(dict(category=["All"], q1=[q1], q2=[q2], q3=[q3], low=[low], high=[high]))

p = figure(x_range=["All"], title=f"6. Box Plot â€” {col1}", height=350)
# vertical bar for IQR
p.vbar(x="category", width=0.7, top="q3", bottom="q1", source=box_src, fill_color="#E08E79")
# median
p.segment("category", "q2", "category", "q2", source=box_src, line_width=4, color="black")
# whiskers
p.segment("category", "high", "category", "q3", source=box_src, line_width=2, color="black")
p.segment("category", "low", "category", "q1", source=box_src, line_width=2, color="black")
# whisker caps
p.rect(x="category", y="high", width=0.2, height=0.0001, source=box_src, color="black")
p.rect(x="category", y="low", width=0.2, height=0.0001, source=box_src, color="black")

p.yaxis.axis_label = col1
show(p)

In [8]:
#7. Sine Wave
x = np.linspace(0, 20, 400)
y = np.sin(x)
sine_src = ColumnDataSource(dict(x=x, y=y))
p = figure(title="7. Sine Wave (sin(x))", height=350, x_axis_label="x", y_axis_label="sin(x)")
p.line(x="x", y="y", source=sine_src, line_width=2)
p.circle(x="x", y="y", source=sine_src, size=2, alpha=0.3)
show(p)



In [9]:
#8. Area Plot
area_df = sample_df.reset_index().head(50)
area_src = ColumnDataSource(area_df)
p = figure(title=f"8. Area Plot â€” first 50 {col1} values", height=350, x_axis_label="Index", y_axis_label=col1)
p.varea(x='index', y1=0, y2=col1, source=area_src, fill_alpha=0.5)
p.line(x='index', y=col1, source=area_src, line_width=2)
show(p)

In [10]:
#9. Horizontal Bar Chart
top10 = sample_df[[col1]].dropna().sort_values(col1, ascending=False).head(10).reset_index()
h_src = ColumnDataSource(top10)
p = figure(y_range=[str(i) for i in top10['index'].tolist()], title=f"9. Horizontal Bar Chart â€” Top 10 by {col1}", height=350)
p.hbar(y=[str(i) for i in top10['index']], right=top10[col1].values, height=0.6)
p.xaxis.axis_label = col1
p.yaxis.axis_label = "Row index"
show(p)

In [11]:
#10. Correlation Heatmap
corr_cols = num_cols[:20]  # limit to first 20 numeric columns for readability
corr = df[corr_cols].corr().fillna(0)
corr_array = corr.values

mapper = LinearColorMapper(palette="RdBu11", low=corr_array.min(), high=corr_array.max())
p = figure(title="10. Correlation Heatmap", x_range=list(corr.columns), y_range=list(reversed(corr.index)), height=500, toolbar_location=None)
p.image(image=[corr_array], x=0, y=0, dw=1, dh=1, color_mapper=mapper)

p = figure(title="10. Correlation Heatmap (rects)", x_range=list(corr.columns), y_range=list(reversed(corr.index)), x_axis_location="above", height=500, tooltips=[("x", "$x"), ("y","$y"), ("corr","@corr")])
xs = []
ys = []
vals = []
for i, r in enumerate(corr.index):
    for j, c in enumerate(corr.columns):
        xs.append(c)
        ys.append(r)
        vals.append(corr.loc[r, c])
rect_src = ColumnDataSource(dict(x=xs, y=ys, corr=vals))
mapper = LinearColorMapper(palette="RdBu11", low=-1, high=1)
p.rect(x="x", y="y", width=1, height=1, source=rect_src, fill_color={'field': 'corr', 'transform': mapper}, line_color=None)
color_bar = ColorBar(color_mapper=mapper, location=(0,0))
p.add_layout(color_bar, 'right')
p.xaxis.major_label_orientation = 1.0
show(p)
