## Finding correlation coefficient between Natural Gas Consumption and Air Quality Index

In [35]:
import sqlite3
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, ColorBar, BasicTicker, PrintfTickFormatter
from bokeh.transform import linear_cmap
from bokeh.layouts import column
from bokeh.palettes import Viridis256


import numpy as np





# Ensure Bokeh plots are displayed in the notebook
output_notebook()

# Connect to SQLite database
conn = sqlite3.connect('new_aqi_ngsDB.sqlite')

# Write an SQL query
query = "SELECT AQI, total, month, state_name FROM combined_df"

# Execute the query and load the data into a DataFrame
df = pd.read_sql_query(query, conn)


# Display the first few rows of the DataFrame
df.head()




Unnamed: 0,AQI,total,month,state_name
0,37,133551.0,1,Alabama
1,38,114933.0,2,Alabama
2,43,103717.0,3,Alabama
3,47,96171.0,4,Alabama
4,44,101999.0,5,Alabama


In [26]:
# Calculate the correlation coefficient between AQI and total
correlation = df['AQI'].corr(df['total'])
print(f"Correlation coefficient between AQI and total: {correlation}")


Correlation coefficient between AQI and total: 0.09918364845115557


In [39]:


# Ensure Bokeh plots are displayed in the notebook
output_notebook()

# Prepare data for Bokeh
source = ColumnDataSource(df)

# Calculate linear regression
slope, intercept = np.polyfit(df['AQI'], df['total'], 1)
regression_line = slope * df['AQI'] + intercept

# Create a scatter plot with additional interactive tools
p = figure(title=f'Scatter Plot of AQI vs Total (Correlation: {correlation:.2f})',
           x_axis_label='AQI', y_axis_label='Total', width=700, height=400,
           tools="pan,wheel_zoom,box_zoom,reset,save,hover", active_drag="box_zoom", active_scroll="wheel_zoom")

# Map the AQI values to colors using a linear color mapper
mapper = linear_cmap(field_name='AQI', palette=Viridis256, low=df['AQI'].min(), high=df['AQI'].max())

# Add scatter glyphs to the plot using scatter() instead of circle()
p.scatter(x='AQI', y='total', size=8, color=mapper, source=source, line_alpha=0.6, fill_alpha=0.6, 
          selection_color="firebrick", nonselection_fill_alpha=0.2, nonselection_line_alpha=0.2)

# Add the linear regression line
p.line(df['AQI'], regression_line, line_width=2, color='red', legend_label='Linear Regression')

# Add a color bar to show the AQI color scale
color_bar = ColorBar(color_mapper=mapper['transform'], width=8, location=(0,0),
                     ticker=BasicTicker(desired_num_ticks=10),
                     formatter=PrintfTickFormatter(format="%d"))

p.add_layout(color_bar, 'right')

# Add tooltips to show information on hover
p.hover.tooltips = [
    ("State", "@state_name"),
    ("Month", "@month"),
    ("AQI", "@AQI"),
    ("Total", "@total"),
]

# Show the plot
show(p)
