In [1]:
# Dependencies
import numpy as np

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

In [2]:
engine = create_engine("sqlite:///hawaii.sqlite")

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

# Save references to each table
Measurement = Base.classes.measurement
Station = Base.classes.station

In [3]:
# Create our session (link) from Python to the DB
session = Session(engine)

## D1: Determine the Summary Statistics for June

In [4]:
# 1. Import the sqlalchemy extract function.
from sqlalchemy import extract

# 2. Write a query that filters the Measurement table to retrieve the temperatures for the month of June. 
import datetime as dt


June_months = session.query(Measurement.date, Measurement.tobs).\
    filter(extract('month', Measurement.date) == 6).all()
June_months

[('2010-06-01', 78.0),
 ('2010-06-02', 76.0),
 ('2010-06-03', 78.0),
 ('2010-06-04', 76.0),
 ('2010-06-05', 77.0),
 ('2010-06-06', 78.0),
 ('2010-06-07', 77.0),
 ('2010-06-08', 78.0),
 ('2010-06-09', 78.0),
 ('2010-06-10', 79.0),
 ('2010-06-11', 78.0),
 ('2010-06-12', 78.0),
 ('2010-06-13', 78.0),
 ('2010-06-14', 77.0),
 ('2010-06-15', 78.0),
 ('2010-06-16', 78.0),
 ('2010-06-17', 77.0),
 ('2010-06-18', 77.0),
 ('2010-06-19', 82.0),
 ('2010-06-20', 78.0),
 ('2010-06-21', 78.0),
 ('2010-06-22', 78.0),
 ('2010-06-23', 78.0),
 ('2010-06-24', 78.0),
 ('2010-06-25', 77.0),
 ('2010-06-26', 76.0),
 ('2010-06-27', 78.0),
 ('2010-06-28', 78.0),
 ('2010-06-29', 78.0),
 ('2010-06-30', 78.0),
 ('2011-06-01', 77.0),
 ('2011-06-02', 78.0),
 ('2011-06-03', 73.0),
 ('2011-06-04', 70.0),
 ('2011-06-05', 76.0),
 ('2011-06-06', 77.0),
 ('2011-06-07', 77.0),
 ('2011-06-08', 77.0),
 ('2011-06-09', 77.0),
 ('2011-06-10', 78.0),
 ('2011-06-11', 77.0),
 ('2011-06-12', 77.0),
 ('2011-06-13', 78.0),
 ('2011-06-

In [5]:
Base.metadata.tables.values()

dict_values([Table('measurement', MetaData(bind=None), Column('id', INTEGER(), table=<measurement>, primary_key=True, nullable=False), Column('station', TEXT(), table=<measurement>), Column('date', TEXT(), table=<measurement>), Column('prcp', FLOAT(), table=<measurement>), Column('tobs', FLOAT(), table=<measurement>), schema=None), Table('station', MetaData(bind=None), Column('id', INTEGER(), table=<station>, primary_key=True, nullable=False), Column('station', TEXT(), table=<station>), Column('name', TEXT(), table=<station>), Column('latitude', FLOAT(), table=<station>), Column('longitude', FLOAT(), table=<station>), Column('elevation', FLOAT(), table=<station>), schema=None)])

In [6]:
session.query(Measurement).all()

[<sqlalchemy.ext.automap.measurement at 0x11be1d910>,
 <sqlalchemy.ext.automap.measurement at 0x11be1d4f0>,
 <sqlalchemy.ext.automap.measurement at 0x11c5636d0>,
 <sqlalchemy.ext.automap.measurement at 0x11c563730>,
 <sqlalchemy.ext.automap.measurement at 0x11c563790>,
 <sqlalchemy.ext.automap.measurement at 0x11c5637f0>,
 <sqlalchemy.ext.automap.measurement at 0x11c563850>,
 <sqlalchemy.ext.automap.measurement at 0x11c5638b0>,
 <sqlalchemy.ext.automap.measurement at 0x11c563910>,
 <sqlalchemy.ext.automap.measurement at 0x11c563970>,
 <sqlalchemy.ext.automap.measurement at 0x11c5639a0>,
 <sqlalchemy.ext.automap.measurement at 0x11c563a30>,
 <sqlalchemy.ext.automap.measurement at 0x11c563ac0>,
 <sqlalchemy.ext.automap.measurement at 0x11c563b50>,
 <sqlalchemy.ext.automap.measurement at 0x11c563be0>,
 <sqlalchemy.ext.automap.measurement at 0x11c563c70>,
 <sqlalchemy.ext.automap.measurement at 0x11c563d00>,
 <sqlalchemy.ext.automap.measurement at 0x11c563d90>,
 <sqlalchemy.ext.automap.mea

In [7]:
# Earliest Date
session.query(Measurement.date).order_by(Measurement.date).first()

('2010-01-01')

In [8]:
# Latest Date
session.query(Measurement.date).order_by(Measurement.date.desc()).first()

('2017-08-23')

In [9]:
#  3. Convert the June temperatures to a list.
June_temps = list(June_months)
June_temps

[('2010-06-01', 78.0),
 ('2010-06-02', 76.0),
 ('2010-06-03', 78.0),
 ('2010-06-04', 76.0),
 ('2010-06-05', 77.0),
 ('2010-06-06', 78.0),
 ('2010-06-07', 77.0),
 ('2010-06-08', 78.0),
 ('2010-06-09', 78.0),
 ('2010-06-10', 79.0),
 ('2010-06-11', 78.0),
 ('2010-06-12', 78.0),
 ('2010-06-13', 78.0),
 ('2010-06-14', 77.0),
 ('2010-06-15', 78.0),
 ('2010-06-16', 78.0),
 ('2010-06-17', 77.0),
 ('2010-06-18', 77.0),
 ('2010-06-19', 82.0),
 ('2010-06-20', 78.0),
 ('2010-06-21', 78.0),
 ('2010-06-22', 78.0),
 ('2010-06-23', 78.0),
 ('2010-06-24', 78.0),
 ('2010-06-25', 77.0),
 ('2010-06-26', 76.0),
 ('2010-06-27', 78.0),
 ('2010-06-28', 78.0),
 ('2010-06-29', 78.0),
 ('2010-06-30', 78.0),
 ('2011-06-01', 77.0),
 ('2011-06-02', 78.0),
 ('2011-06-03', 73.0),
 ('2011-06-04', 70.0),
 ('2011-06-05', 76.0),
 ('2011-06-06', 77.0),
 ('2011-06-07', 77.0),
 ('2011-06-08', 77.0),
 ('2011-06-09', 77.0),
 ('2011-06-10', 78.0),
 ('2011-06-11', 77.0),
 ('2011-06-12', 77.0),
 ('2011-06-13', 78.0),
 ('2011-06-

In [11]:
# 4. Create a DataFrame from the list of temperatures for the month of June. 
import pandas as pd

June_df = pd.DataFrame(June_temps, columns=['date','temperature'])
June_df

Unnamed: 0,date,temperature
0,2010-06-01,78.0
1,2010-06-02,76.0
2,2010-06-03,78.0
3,2010-06-04,76.0
4,2010-06-05,77.0
...,...,...
1695,2017-06-26,79.0
1696,2017-06-27,74.0
1697,2017-06-28,74.0
1698,2017-06-29,76.0


In [None]:
# 5. Calculate and print out the summary statistics for the June temperature DataFrame.
June_df.describe()

## D2: Determine the Summary Statistics for December

In [None]:
# 6. Write a query that filters the Measurement table to retrieve the temperatures for the month of December.
December_months = session.query(Measurement.date, Measurement.tobs).\
    filter(extract('month', Measurement.date) == 12).all()
December_months

In [None]:
# 7. Convert the December temperatures to a list.
Dec_temps = list(December_months)
Dec_temps

In [None]:
# 8. Create a DataFrame from the list of temperatures for the month of December. 
Dec_df = pd.DataFrame(Dec_temps, columns=['date','temperature'])
Dec_df

In [None]:
# 9. Calculate and print out the summary statistics for the Decemeber temperature DataFrame.
Dec_df.describe()

In [None]:
# Conclusion. First of two additional queries - December
December_one = session.query(Measurement.date, Measurement.tobs, Station.elevation).\
    filter(extract('month', Measurement.date) == 12).all()

In [None]:
December_one_df = pd.DataFrame(December_one, columns=['date','temperature','elevation'])
December_one_df

In [None]:
# Conclusion. Second of two additional queries - December
December_two = session.query(Measurement.date, Measurement.tobs, Station.latitude, Station.longitude).\
    filter(extract('month', Measurement.date) == 12).all()

In [None]:
December_two_df = pd.DataFrame(December_two, columns=['date','temperature','latitude','longitude'])
December_two_df