## Impact of Seasonal Natural Gas Consumption on Air Quality
- Analyze how seasonal variations in natural gas consumption influence air quality, focusing on the comparison between winter and summer months.

In [61]:
# Importing dependencies
#Python SQL Toolkit and ORM
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, text

#Pandas and Bokeh libraries
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.io import output_file

#### Fetching data from our database and converting it to pandas DataFrame

In [62]:
#Create Database Connection
engine = create_engine("sqlite:///../Resources/new_aqi_ngsDB.sqlite", echo=False)

In [63]:
#Refelect Database into ORM Classes
Base = automap_base()
Base.prepare(autoload_with=engine)
Base.classes.keys()

['combined_df']

In [64]:
#map the combined_df class

combined_table_aqi_ng = Base.classes.combined_df

#create session
session = Session(engine)

In [65]:
#get a list of column names and types
inspector = inspect(engine)
columns = inspector.get_columns('combined_df')

for column in columns:
    print(column['name'], column['type'])

id INTEGER
state_name TEXT
state_id TEXT
month INTEGER
AQI INTEGER
lat REAL
lng REAL
density INTEGER
population INTEGER
Category TEXT
Commercial Consumption REAL
Delivered to Consumers REAL
Electric Power Consumption REAL
Industrial Consumption REAL
Residential Consumption REAL
Vehicle Fuel Consumption REAL
total REAL


In [66]:
#Query the data from our table
data_query = session.query(combined_table_aqi_ng.id,combined_table_aqi_ng.state_id,combined_table_aqi_ng.month,combined_table_aqi_ng.AQI,
                           combined_table_aqi_ng.lat,combined_table_aqi_ng.lng,combined_table_aqi_ng.Category,combined_table_aqi_ng.total ).all()

for item in data_query:
    print(item)

(0, 'AL', 1, 37, 33.3173, -86.8356, 'Good', 133551.0)
(1, 'AL', 2, 38, 33.2297, -86.8105, 'Good', 114933.0)
(2, 'AL', 3, 43, 33.1279, -86.8814, 'Good', 103717.0)
(3, 'AL', 4, 47, 33.1279, -86.8814, 'Good', 96171.0)
(4, 'AL', 5, 44, 33.1231, -86.8856, 'Good', 101999.0)
(5, 'AL', 6, 38, 33.1279, -86.8814, 'Good', 115579.0)
(6, 'AL', 7, 39, 33.1369, -86.8778, 'Good', 119120.0)
(7, 'AL', 8, 42, 33.1135, -86.8943, 'Good', 124022.0)
(8, 'AL', 9, 37, 33.1279, -86.8814, 'Good', 109530.0)
(9, 'AL', 10, 37, 33.146, -86.8742, 'Good', 109041.0)
(10, 'AL', 11, 38, 33.2614, -86.8268, 'Good', 115678.0)
(11, 'AL', 12, 34, 33.3429, -86.7456, 'Good', 119300.0)
(12, 'AK', 1, 58, 61.4802, -143.6454, 'Moderate', 15243.0)
(13, 'AK', 2, 42, 61.4802, -143.6454, 'Good', 15370.0)
(14, 'AK', 3, 29, 61.4802, -143.6454, 'Good', 15715.0)
(15, 'AK', 4, 31, 61.6964, -144.322, 'Good', 11956.0)
(16, 'AK', 5, 24, 61.4802, -143.6454, 'Good', 9288.0)
(17, 'AK', 6, 21, 61.4802, -143.6454, 'Good', 7352.0)
(18, 'AK', 7, 29, 

In [67]:
#converting the query result into a pandas DataFrame
query_list = []

for row in data_query:
    query_dics={
        'id':row.id,
        'state':row.state_id,
        'month':row.month,
        'AQI':row.AQI,
        'lat':row.lat,
        'lng':row.lng,
        'category':row.Category,
        'natural_gas_consumption':row.total
        }
    query_list.append(query_dics)

df = pd.DataFrame(query_list)
df.head()

Unnamed: 0,id,state,month,AQI,lat,lng,category,natural_gas_consumption
0,0,AL,1,37,33.3173,-86.8356,Good,133551.0
1,1,AL,2,38,33.2297,-86.8105,Good,114933.0
2,2,AL,3,43,33.1279,-86.8814,Good,103717.0
3,3,AL,4,47,33.1279,-86.8814,Good,96171.0
4,4,AL,5,44,33.1231,-86.8856,Good,101999.0


## Perform the seasonal Analysis
- Focusing on Summer and Winter months 

In [68]:
#Define a function for mapping months to seasons

def seasons(month):
    if month in [12,1,2]:
        return 'Winter'
    elif month in [6,7,8]:
        return 'Summer'
    else:
        return 'Other'
    
#applying the functino to create a new season column in our dataframe for each months numeric value
df['season'] = df['month'].apply(seasons)

df.head(10)

Unnamed: 0,id,state,month,AQI,lat,lng,category,natural_gas_consumption,season
0,0,AL,1,37,33.3173,-86.8356,Good,133551.0,Winter
1,1,AL,2,38,33.2297,-86.8105,Good,114933.0,Winter
2,2,AL,3,43,33.1279,-86.8814,Good,103717.0,Other
3,3,AL,4,47,33.1279,-86.8814,Good,96171.0,Other
4,4,AL,5,44,33.1231,-86.8856,Good,101999.0,Other
5,5,AL,6,38,33.1279,-86.8814,Good,115579.0,Summer
6,6,AL,7,39,33.1369,-86.8778,Good,119120.0,Summer
7,7,AL,8,42,33.1135,-86.8943,Good,124022.0,Summer
8,8,AL,9,37,33.1279,-86.8814,Good,109530.0,Other
9,9,AL,10,37,33.146,-86.8742,Good,109041.0,Other


In [56]:
#Define a function for mapping months to seasons

def seasons(month):
    if month in [12,1,2]:
        return 'Winter'
    elif month in [6,7,8]:
        return 'Summer'
    else:
        return 'Other'
    
#applying the functino to create a new season column in our dataframe for each months numeric value
df['season'] = df['month'].apply(seasons)

df.head(10)

Unnamed: 0,id,state,month,AQI,lat,lng,category,natural_gas_consumption,season
0,0,AK,1,58,61.4802,-143.6454,Moderate,15243.0,Winter
1,1,AK,2,42,61.4802,-143.6454,Good,15370.0,Winter
2,2,AK,3,29,61.4802,-143.6454,Good,15715.0,Other
3,3,AK,4,31,61.6964,-144.322,Good,11956.0,Other
4,4,AK,5,24,61.4802,-143.6454,Good,9288.0,Other
5,5,AK,6,21,61.4802,-143.6454,Good,7352.0,Summer
6,6,AK,7,29,61.4802,-143.6454,Good,7429.0,Summer
7,7,AK,8,19,61.5467,-143.8536,Good,8279.0,Summer
8,8,AK,9,18,61.4802,-143.6454,Good,8563.0,Other
9,9,AK,10,26,61.4802,-143.6454,Good,12262.0,Other


In [69]:
df.count()

id                         609
state                      609
month                      609
AQI                        609
lat                        609
lng                        609
category                   609
natural_gas_consumption    609
season                     609
dtype: int64

In [70]:
#filtering our dataframe based on season =winter and summer values
seasonal_df = df[df['season'].isin(['Winter','Summer'])]

seasonal_df.count()


id                         305
state                      305
month                      305
AQI                        305
lat                        305
lng                        305
category                   305
natural_gas_consumption    305
season                     305
dtype: int64

In [71]:
#Group by state and season to aggregate the data
agg_df = seasonal_df.groupby(['state','season']).agg({
    'AQI':'mean',
    'natural_gas_consumption':'mean'
}).reset_index()

agg_df.head()

Unnamed: 0,state,season,AQI,natural_gas_consumption
0,AK,Summer,23.0,7686.666667
1,AK,Winter,49.666667,20438.333333
2,AL,Summer,39.666667,119573.666667
3,AL,Winter,36.333333,122594.666667
4,AR,Summer,43.666667,54133.333333
