In [8]:
from bokeh.plotting import figure, show, output_file
from bokeh.transform import linear_cmap
from bokeh.models import ColumnDataSource, ColorBar, HoverTool
from bokeh.palettes import plasma
import pandas as pd
import numpy as np

csv_file = "C:/Users/tanis/Documents/Fall2023_4th_Sem/CS2613/Python/ExplorationActivity1/explorationactivity1-goldengomi/Water_pond_tanks_2021.csv"

# Read the CSV file into a DataFrame
inputdf = pd.read_csv(csv_file, encoding='cp1252')

# Select the pH (Max) column from the data
pHMaxColumn = inputdf['pH (Max)']

# Create a histogram
hist, edges = np.histogram(pHMaxColumn, bins=20)

# Create a new DataFrame for the histogram data
hist_df = pd.DataFrame({'pH (Max)': hist, 'left': edges[:-1], 'right': edges[1:]})

# Calculate the maximum value for each bar in the histogram to be used for the hover tool
max_values = [pHMaxColumn[(pHMaxColumn >= left) & (pHMaxColumn <= right)].max() for left, right in zip(hist_df['left'], hist_df['right'])]
hist_df['max_value'] = max_values

# Create a ColumnDataSource from the histogram DataFrame
source = ColumnDataSource(hist_df)

# Create a Bokeh figure
fig = figure(height=350, title="Max pH Level of Indian Water Bodies")

# Define a color map to format the histogram's colours
colorMap = linear_cmap(field_name='pH (Max)', palette=plasma(20), low=min(hist), high=max(hist))

# Add a quad glyph to represent the histogram bars
fig.quad(top='pH (Max)', bottom=0, left='left', right='right', source=source,
       fill_color=colorMap, line_color='white', legend_label='pH (Max)')

# Create a HoverTool for an interactive feature
hover = HoverTool()
hover.tooltips = [("Max Value", "@max_value{0.00}"), ("Interval", "(@left, @right)")]
fig.add_tools(hover)

# Add color bar
colorBar = ColorBar(color_mapper=colorMap['transform'], width=8, location=(0,0))
fig.add_layout(colorBar, 'right')

# Customize the plot
fig.xaxis.axis_label = "pH (Max)"
fig.yaxis.axis_label = "Frequency"

output_file("pHLevelHistogram.html")
show(fig)
