In [35]:
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import func
import matplotlib.pyplot as plt
import datetime
from dateutil.relativedelta import relativedelta

In [2]:
engine = create_engine("sqlite:///hawaii_weather.sqlite")

In [3]:
Base = automap_base()

In [4]:
Base.prepare(engine, reflect=True)

In [5]:
print(engine)

Engine(sqlite:///hawaii_weather.sqlite)


In [6]:
Base.classes.keys()

['measurement', 'station']

In [7]:
Measurements = Base.classes.measurement
Stations = Base.classes.station

In [8]:
session = Session(engine)
test = session.query(Measurements).first().__dict__
test

  'storage.' % (dialect.name, dialect.driver))


{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x10dca3160>,
 'date': datetime.date(2010, 1, 1),
 'id': 1,
 'precipitation': '0.08',
 'station_name': 'USC00519397',
 'temperature': Decimal('65.0000000000')}

In [9]:
precipitation = session.query(Measurements.date,\
                              Measurements.precipitation\
                             )\
                                .filter(Measurements.precipitation.isnot(None))\
                                .filter(Measurements.date > '2016-10-22').order_by(Measurements.date)
precipitation.__dict__     

{'_criterion': <sqlalchemy.sql.elements.BooleanClauseList object at 0x10dca34a8>,
 '_entities': [<sqlalchemy.orm.query._ColumnEntity at 0x10dca3278>,
  <sqlalchemy.orm.query._ColumnEntity at 0x10dca32e8>],
 '_has_mapper_entities': False,
 '_mapper_adapter_map': {<Mapper at 0x104bc1780; measurement>: (<Mapper at 0x104bc1780; measurement>,
   None)},
 '_order_by': [Column('date', DATE(), table=<measurement>)],
 '_polymorphic_adapters': {},
 '_primary_entity': None,
 'session': <sqlalchemy.orm.session.Session at 0x10dc53240>}

In [10]:
station_count = session.query(Stations.station_name).count()
station_count
print('The number of weather stations in our SQLite DB is %r.' % station_count)

The number of weather stations in our SQLite DB is 9.


In [71]:
# The most active stations by number of temperature observations.
most_active_sations_tmp = session.query(Measurements.station_name, func.count(Measurements.id).label('tmp_total'))\
    .group_by(Measurements.station_name).order_by(func.count(Measurements.id).desc()).all()
most_active_sations_tmp

[('USC00519281', 2772),
 ('USC00519397', 2724),
 ('USC00513117', 2709),
 ('USC00519523', 2669),
 ('USC00516128', 2612),
 ('USC00514830', 2202),
 ('USC00511918', 1979),
 ('USC00517948', 1372),
 ('USC00518838', 511)]

In [12]:
# The most active stations by number of precipitation observations, distinct from the above query because
# some of the recordings that contain temperature data have null values for precipitation, which are here filtered out.
most_active_sations_prcp = session.query(Measurements.station_name, func.count(Measurements.id).label('tmp_total'))\
    .filter(Measurements.precipitation.isnot(None))\
    .group_by(Measurements.station_name).order_by(func.count(Measurements.id).desc).all()
most_active_sations_prcp

  util.ellipses_string(element.element))


[('USC00519281', 2772),
 ('USC00513117', 2696),
 ('USC00519397', 2685),
 ('USC00519523', 2572),
 ('USC00516128', 2484),
 ('USC00514830', 1937),
 ('USC00511918', 1932),
 ('USC00517948', 683),
 ('USC00518838', 342)]

In [13]:
# Find the most active station in the last 12 months. 
most_active_station_last_12_mos = session.query(Measurements.station_name, func.count(Measurements.id).label('tmp_total'))\
    .filter(Measurements.date >'2016-08-23').group_by(Measurements.station_name).order_by('tmp_total DESC').first()
print('The station with the largest number of temperature readings over the last 12 months of \
data is %r.' % most_active_station_last_12_mos[0])

The station with the largest number of temperature readings over the last 12 months of data is 'USC00519397'.


  util.ellipses_string(element.element))


In [14]:
# Retrieve the last 12 months of temperature readings from the most active sation.
temp_measures_most_active_last_12_mos = session.query(Measurements.date, Measurements.temperature)\
    .filter(Measurements.station_name.contains("USC00519397")).filter(Measurements.date > '2016-08-23').all()


In [15]:
def convert_date(year, month, date):
    orig_date = datetime.datetime(year, month, date)
    orig_date = str(orig_date)
    d = datetime.datetime.strptime(orig_date, '%Y-%m-%d %H:%M:%S')
    d = d.strftime('%Y-%m-%d')
    return d

convert_date(2016,6,12)

'2016-06-12'

In [37]:
trip_start = datetime.datetime(2016,9,1)
trip_return = datetime.datetime(2016,9,15)

trip_start_str = trip_start.strftime('%Y-%m-%d')
trip_return_str = trip_return.strftime('%Y-%m-%d')

trip_start_prior_year_str = (trip_start - relativedelta(years=1)).strftime('%Y-%m-%d')
trip_return_prior_year_str = (trip_return - relativedelta(years=1)).strftime('%Y-%m-%d')

'2015-09-01'

In [38]:
def calc_temps(start_date, end_date):
    labels = ['min', 'max', 'avg']
    temperatures = session.query(func.min(Measurements.temperature),
                                 func.max(Measurements.temperature),
                                 func.avg(Measurements.temperature)
                                )\
                                .filter(Measurements.date >= start_date, Measurements.date <= end_date).all()
    temperatures = [float(x) for x in temperatures[0]]
    d = dict((key, value) for (key, value) in zip(labels,temperatures))

    return d

In [39]:
calc_temps(trip_start, trip_return)

{'avg': 77.40229885057471, 'max': 84.0, 'min': 71.0}

In [68]:
def calc_normals(start_date, end_date):
    labels = ['date','min', 'max', 'avg']

    normals = session.query(
        Measurements.date,
        func.min(Measurements.temperature),
        func.max(Measurements.temperature),
        func.avg(Measurements.temperature)
                                )\
                                .group_by(Measurements.date)\
                                .filter(Measurements.date >= start_date, Measurements.date <= end_date).all()
    l = [[x[0].strftime('%Y-%m-%d'), float(x[1]), float(x[2]), float(x[3])] for x in normals]
    d = [dict((k,v) for (k,v) in zip(labels,x)) for x in l]
    return d

In [69]:
calc_normals(trip_start, trip_return)

[{'avg': 79.42857142857143, 'date': '2016-09-02', 'max': 81.0, 'min': 75.0},
 {'avg': 77.4, 'date': '2016-09-03', 'max': 79.0, 'min': 75.0},
 {'avg': 77.0, 'date': '2016-09-04', 'max': 84.0, 'min': 73.0},
 {'avg': 79.5, 'date': '2016-09-05', 'max': 84.0, 'min': 76.0},
 {'avg': 75.66666666666667, 'date': '2016-09-06', 'max': 80.0, 'min': 73.0},
 {'avg': 75.83333333333333, 'date': '2016-09-07', 'max': 80.0, 'min': 74.0},
 {'avg': 78.71428571428571, 'date': '2016-09-08', 'max': 81.0, 'min': 74.0},
 {'avg': 77.42857142857143, 'date': '2016-09-09', 'max': 79.0, 'min': 75.0},
 {'avg': 76.8, 'date': '2016-09-10', 'max': 78.0, 'min': 75.0},
 {'avg': 77.2, 'date': '2016-09-11', 'max': 83.0, 'min': 71.0},
 {'avg': 76.85714285714286, 'date': '2016-09-12', 'max': 79.0, 'min': 72.0},
 {'avg': 77.57142857142857, 'date': '2016-09-13', 'max': 79.0, 'min': 75.0},
 {'avg': 75.85714285714286, 'date': '2016-09-14', 'max': 79.0, 'min': 73.0},
 {'avg': 77.85714285714286, 'date': '2016-09-15', 'max': 81.0, '