In [31]:
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
from bokeh.transform import factor_cmap
from bokeh.layouts import layout
from bokeh.palettes import Category10, Viridis256
import numpy as np

data = pd.read_csv("/content/city_day.csv.zip", compression='zip')
data['Date'] = pd.to_datetime(data['Date'])

city_name = 'Delhi'
city_data = data[data['City'] == city_name].copy()
source = ColumnDataSource(city_data)

In [5]:
p1 = figure(title=f"PM2.5 Trend in {city_name}", x_axis_type='datetime', width=600, height=350)
p1.line('Date', 'PM2.5', source=source, color='green', line_width=2)
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'PM2.5'


In [6]:
p2 = figure(title="PM2.5 vs PM10 by City", width=600, height=350)
p2.circle('PM2.5', 'PM10', source=ColumnDataSource(data), color='navy', fill_alpha=0.4, size=7)
p2.xaxis.axis_label = 'PM2.5'
p2.yaxis.axis_label = 'PM10'



In [7]:
hist, edges = np.histogram(data['PM2.5'].dropna(), bins=40)
p3 = figure(title="Distribution of PM2.5", width=600, height=350)
p3.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color='orange', line_color='white')


In [8]:
cities = data['City'].unique().tolist()
p4 = figure(x_range=cities, title="City-wise PM2.5 Levels", width=600, height=350)
box_data = [data[data['City']==c]['PM2.5'].dropna() for c in cities]
q1 = [np.percentile(x, 25) for x in box_data]
q2 = [np.percentile(x, 50) for x in box_data]
q3 = [np.percentile(x, 75) for x in box_data]
p4.segment(cities, q3, cities, q1, color="black")
p4.vbar(cities, 0.7, q2, q3, fill_color="lightblue", line_color="black")
p4.vbar(cities, 0.7, q1, q2, fill_color="lightblue", line_color="black")
p4.xaxis.major_label_orientation = "vertical"


In [9]:
avg_aqi = data.groupby('City', as_index=False)['AQI'].mean()
source5 = ColumnDataSource(avg_aqi)
p5 = figure(x_range=avg_aqi['City'], title="Average AQI by City", width=600, height=350)
p5.vbar(x='City', top='AQI', source=source5, width=0.6, color='teal')
p5.xaxis.major_label_orientation = "vertical"


In [10]:
corr = data[['PM2.5','PM10','NO','NO2','NOx','NH3','CO','SO2','O3']].corr()
corr.index.name = 'x'
corr.columns.name = 'y'
corr_data = corr.stack().rename("value").reset_index()
mapper = LinearColorMapper(palette=Viridis256, low=corr_data.value.min(), high=corr_data.value.max())
p6 = figure(title="Pollutant Correlation Heatmap", x_range=list(corr.columns), y_range=list(reversed(corr.index)), width=600, height=500, tools="hover", tooltips=[("corr", "@value")])
p6.rect(x="x", y="y", width=1, height=1, source=corr_data, line_color=None, fill_color={'field': 'value', 'transform': mapper})
color_bar = ColorBar(color_mapper=mapper, label_standoff=12, location=(0,0))
p6.add_layout(color_bar, 'right')

In [12]:
from math import pi
from bokeh.models import ColumnDataSource

aqi_counts = data['AQI_Bucket'].value_counts().reset_index()
aqi_counts.columns = ['AQI_Bucket', 'Count']
aqi_counts['angle'] = aqi_counts['Count']/aqi_counts['Count'].sum() * 2*pi
aqi_counts['color'] = Category10[len(aqi_counts)]

# Calculate the cumulative angles and add them to the DataFrame
aqi_counts['start_angle'] = np.cumsum([0] + list(aqi_counts['angle'][:-1]))
aqi_counts['end_angle'] = np.cumsum(aqi_counts['angle'])

# Create a ColumnDataSource from the DataFrame
source7 = ColumnDataSource(aqi_counts)

p7 = figure(title="AQI Bucket Distribution", width=500, height=400, tools="hover", tooltips="@AQI_Bucket: @Count")

# Reference the column names from the source for angles and color
p7.wedge(x=0, y=0, radius=0.4, start_angle='start_angle', end_angle='end_angle',
         color='color', legend_field='AQI_Bucket', source=source7)

In [13]:
pm10_trend = data.groupby('Date', as_index=False)['PM10'].mean()
p8 = figure(title="Average PM10 Levels Over Time", x_axis_type='datetime', width=600, height=350)
p8.varea(x=pm10_trend['Date'], y1=0, y2=pm10_trend['PM10'], fill_color='lightgreen', alpha=0.6)

In [19]:
from bokeh.transform import jitter
from bokeh.palettes import Category20

p9 = figure(x_range=cities, title="NO2 Distribution Across Cities", width=600, height=350)
# Use Category20 which has more colors, and cycle through it if more than 20 cities
colors = Category20[20] * (len(cities) // 20 + 1)
for i, c in enumerate(cities):
    p9.circle(x=[c]*len(data[data['City']==c].dropna(subset=['NO2'])), y=data[data['City']==c]['NO2'].dropna(), size=4, alpha=0.4, color=colors[i])
p9.xaxis.major_label_orientation = "vertical"



In [17]:
bubble_data = data.dropna(subset=['PM2.5','NO2','CO']).copy() # Add .copy() to avoid SettingWithCopyWarning
bubble_data['bubble_size'] = bubble_data['CO']*2 # Calculate size and add as a new column

source10 = ColumnDataSource(bubble_data)
p10 = figure(title="PM2.5 vs NO2 (Bubble Size = CO)", width=600, height=350)
# Use scatter and reference the new 'bubble_size' column
p10.scatter('PM2.5', 'NO2', size='bubble_size', fill_alpha=0.5, color='purple', source=source10)
p10.xaxis.axis_label = 'PM2.5'
p10.yaxis.axis_label = 'NO2'

In [39]:
from bokeh.io import reset_output, output_notebook, curdoc
reset_output()
curdoc().clear()
output_notebook()


In [41]:
from bokeh.plotting import figure, show
from bokeh.layouts import gridplot
import numpy as np


x = np.linspace(0, 10, 100)
y = np.sin(x)

p1 = figure(title="Plot 1"); p1.line(x, y)
p2 = figure(title="Plot 2"); p2.vbar(x=[1,2,3], top=[4,5,6], width=0.4)

p3 = figure(title="Plot 3"); p3.scatter(x, y, size=6)
p4 = figure(title="Plot 4"); p4.line(x, np.cos(x), color="green")
p5 = figure(title="Plot 5"); p5.line(x, y**2)
p6 = figure(title="Plot 6"); p6.scatter(x, np.sqrt(y+1), color="red", size=6)
p7 = figure(title="Plot 7"); p7.line(x, np.log(x+1))
p8 = figure(title="Plot 8"); p8.line(x, np.exp(y))
p9 = figure(title="Plot 9"); p9.scatter(x, np.abs(y), size=8)
p10 = figure(title="Plot 10"); p10.line(x, np.tan(y)/5)


grid = gridplot([
    [p1, p2],
    [p3, p4],
    [p5, p6],
    [p7, p8],
    [p9, p10]
])

show(grid)