In [2]:
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import func
import matplotlib.pyplot as plt
import datetime
from dateutil.relativedelta import relativedelta

In [3]:
engine = create_engine("sqlite:///hawaii_weather.sqlite")

In [4]:
Base = automap_base()

In [5]:
Base.prepare(engine, reflect=True)

In [6]:
print(engine)

Engine(sqlite:///hawaii_weather.sqlite)


In [7]:
Base.classes.keys()

['measurement', 'station']

In [8]:
Measurements = Base.classes.measurement
Stations = Base.classes.station

In [9]:
session = Session(engine)

In [10]:
Stations.__table__

Table('station', MetaData(bind=None), Column('station_name', VARCHAR(length=255), table=<station>, primary_key=True, nullable=False), Column('location_name', VARCHAR(length=255), table=<station>), Column('latitude', NUMERIC(), table=<station>), Column('longitude', NUMERIC(), table=<station>), Column('elevation', NUMERIC(), table=<station>), schema=None)

In [11]:
Measurements.__table__

Table('measurement', MetaData(bind=None), Column('id', INTEGER(), table=<measurement>, primary_key=True, nullable=False), Column('station_name', VARCHAR(length=255), ForeignKey('station.station_name'), table=<measurement>), Column('date', DATE(), table=<measurement>), Column('precipitation', VARCHAR(length=255), table=<measurement>), Column('temperature', NUMERIC(), table=<measurement>), schema=None)

In [12]:
# Find the latest observation, useful for doing queries on the last 12 months of data.
latest_observation = session.query(Measurements.date).order_by(Measurements.date.desc()).first()
latest_obseration_str = latest_observation[0].strftime('%Y-%m-%d')
latest_obseration_str

'2017-08-23'

In [13]:
# Return the last 12 months of precipitation observations in the data set.
precipitation_last_12 = session.query(Stations.station_name,\
                                      Stations.location_name\
                                     )\
    .join(Measurements, Stations.station_name==Measurements.station_name)\
    .add_columns(Measurements.precipitation, Measurements.date)\
    .filter(Measurements.precipitation.isnot(None))\
    .filter(Measurements.date > (latest_observation[0] - relativedelta(years=1)).strftime('%Y-%m-%d'))\
    .order_by(Measurements.date).all()
precipitation_last_12_dict = [
   dict( (k,v) for (k,v) in zip(('station_id', 'station_name', 'prcp', 'date'), (x[0], x[1], float(x[2]), x[3].strftime('%Y-%m-%d')) ) )
for x in precipitation_last_12
]
precipitation_last_12_dict[0]

{'date': '2016-08-24',
 'prcp': 0.08,
 'station_id': 'USC00519397',
 'station_name': 'WAIKIKI 717.2, HI US'}

In [14]:
# Return a count of the stations in the data set.
station_count = session.query(Stations.station_name).count()
station_count
print('The number of weather stations in our SQLite DB is %r.' % station_count)

The number of weather stations in our SQLite DB is 9.


In [15]:
# The most active stations by number of temperature observations.
most_active_sations_tmp = session.query(Stations.station_name,\
                                      Stations.location_name\
                                     )\
    .join(Measurements, Stations.station_name==Measurements.station_name)\
    .add_columns(func.count(Measurements.id).label('tmp_total'))\
    .filter(Measurements.temperature.isnot(None))\
    .group_by(Measurements.station_name).order_by(func.count(Measurements.id).desc()).all()

most_active_sations_tmp_dict = [
   dict( (k,v) for (k,v) in zip(('station_id', 'station_name', 'temperature_observations'), (str(x[0]), str(x[1]), int(x[2])) ) )
for x in most_active_sations_tmp
]

most_active_sations_tmp_dict[-1]

{'station_id': 'USC00518838',
 'station_name': 'UPPER WAHIAWA 874.3, HI US',
 'temperature_observations': 511}

In [68]:
# The most active stations by number of precipitation observations, distinct from the above query because
# some of the recordings that contain temperature data have null values for precipitation, which are here filtered out.
most_active_sations_prcp = session.query(Stations.station_name,\
                                      Stations.location_name\
                                     )\
    .join(Measurements, Stations.station_name==Measurements.station_name)\
    .add_columns(func.count(Measurements.id).label('tmp_total'))\
    .filter(Measurements.precipitation.isnot(None))\
    .group_by(Measurements.station_name).order_by(func.count(Measurements.id).desc()).all()

most_active_sations_prcp_dict = [
   dict( (k,v) for (k,v) in zip(('station_id', 'station_name', 'precipitation_observations'), (str(x[0]), str(x[1]), int(x[2])) ) )
for x in most_active_sations_prcp
]

most_active_sations_prcp_dict[-1]

{'precipitation_observations': 342,
 'station_id': 'USC00518838',
 'station_name': 'UPPER WAHIAWA 874.3, HI US'}

In [1]:
# Retrieve just the station names and details.
station_info = session.query(Stations.station_name,\
                                         Stations.location_name,\
                                         Stations.latitude,\
                                         Stations.longitude,\
                                         Stations.elevation,\
                                        ).all()

stations_labels = ('station_id', 'station_name', 'latitude', 'longitude', 'elevation')

station_info_dict = [
   dict( (k,v) for (k,v) in zip(stations_labels, (str(x[0]), str(x[1]), float(x[2]), float(x[3]), float(x[4])) ) )
for x in station_info
]

NameError: name 'session' is not defined

In [14]:
# Find the most active station in the last 12 months. 
most_active_station_last_12_mos = session.query(Measurements.station_name, func.count(Measurements.id).label('tmp_total'))\
    .filter(Measurements.date >'2016-08-23').group_by(Measurements.station_name).order_by(func.count(Measurements.id).desc()).first()
print('The station with the largest number of temperature readings over the last 12 months of \
data is %r.' % most_active_station_last_12_mos[0])

The station with the largest number of temperature readings over the last 12 months of data is 'USC00519397'.


In [15]:
# Retrieve the last 12 months of temperature readings from the most active sation.
temp_measures_most_active_last_12_mos = session.query(Measurements.date, Measurements.temperature)\
    .filter(Measurements.station_name.contains("USC00519397")).filter(Measurements.date > '2016-08-23').all()


  'storage.' % (dialect.name, dialect.driver))


In [16]:
def convert_date(year, month, date):
    orig_date = datetime.datetime(year, month, date)
    orig_date = str(orig_date)
    d = datetime.datetime.strptime(orig_date, '%Y-%m-%d %H:%M:%S')
    d = d.strftime('%Y-%m-%d')
    return d

convert_date(2016,6,12)

'2016-06-12'

In [17]:
trip_start = datetime.datetime(2016,9,1)
trip_return = datetime.datetime(2016,9,15)

trip_start_str = trip_start.strftime('%Y-%m-%d')
trip_return_str = trip_return.strftime('%Y-%m-%d')

trip_start_prior_year_str = (trip_start - relativedelta(years=1)).strftime('%Y-%m-%d')
trip_return_prior_year_str = (trip_return - relativedelta(years=1)).strftime('%Y-%m-%d')

In [18]:
def calc_temps(start_date, end_date):
    labels = ['min', 'max', 'avg']
    temperatures = session.query(func.min(Measurements.temperature),
                                 func.max(Measurements.temperature),
                                 func.avg(Measurements.temperature)
                                )\
                                .filter(Measurements.date >= start_date, Measurements.date <= end_date).all()
    temperatures = [float(x) for x in temperatures[0]]
    d = dict((key, value) for (key, value) in zip(labels,temperatures))

    return d

In [19]:
calc_temps(trip_start, trip_return)

{'avg': 77.40229885057471, 'max': 84.0, 'min': 71.0}

In [20]:
def calc_normals(start_date, end_date=None):
    if end_date == None:
        end_date = datetime.datetime.now().strftime("%Y-%m-%d")
    else:
        end_date
    
    labels = ['date','min', 'max', 'avg']

    normals = session.query(
        Measurements.date,
        func.min(Measurements.temperature),
        func.max(Measurements.temperature),
        func.avg(Measurements.temperature)
                                )\
                                .group_by(Measurements.date)\
                                .filter(Measurements.date >= start_date, Measurements.date <= end_date).all()
    l = [[x[0].strftime('%Y-%m-%d'), float(x[1]), float(x[2]), float(x[3])] for x in normals]
    d = [dict((k,v) for (k,v) in zip(labels,x)) for x in l]
    return d

In [21]:
datetime.datetime.now().strftime("%Y-%m-%d")

'2017-10-23'

In [22]:
calc_normals(trip_start)

[{'avg': 79.42857142857143, 'date': '2016-09-02', 'max': 81.0, 'min': 75.0},
 {'avg': 77.4, 'date': '2016-09-03', 'max': 79.0, 'min': 75.0},
 {'avg': 77.0, 'date': '2016-09-04', 'max': 84.0, 'min': 73.0},
 {'avg': 79.5, 'date': '2016-09-05', 'max': 84.0, 'min': 76.0},
 {'avg': 75.66666666666667, 'date': '2016-09-06', 'max': 80.0, 'min': 73.0},
 {'avg': 75.83333333333333, 'date': '2016-09-07', 'max': 80.0, 'min': 74.0},
 {'avg': 78.71428571428571, 'date': '2016-09-08', 'max': 81.0, 'min': 74.0},
 {'avg': 77.42857142857143, 'date': '2016-09-09', 'max': 79.0, 'min': 75.0},
 {'avg': 76.8, 'date': '2016-09-10', 'max': 78.0, 'min': 75.0},
 {'avg': 77.2, 'date': '2016-09-11', 'max': 83.0, 'min': 71.0},
 {'avg': 76.85714285714286, 'date': '2016-09-12', 'max': 79.0, 'min': 72.0},
 {'avg': 77.57142857142857, 'date': '2016-09-13', 'max': 79.0, 'min': 75.0},
 {'avg': 75.85714285714286, 'date': '2016-09-14', 'max': 79.0, 'min': 73.0},
 {'avg': 77.85714285714286, 'date': '2016-09-15', 'max': 81.0, '

In [60]:
date1 = '2012-01-01'
date2 = '2012-01-01'
error = 0

In [61]:
if date2 is None:
    try:
        datetime.datetime.strptime(date1, '%Y-%m-%d')
    except ValueError:
        print('Invalid date!')
        error += 1
else:
    try:
        datetime.datetime.strptime(date1, '%Y-%m-%d')
        datetime.datetime.strptime(date2, '%Y-%m-%d')

    except ValueError:
        print('Invalid date!')
        error += 1



In [62]:
datetime.datetime.strptime(date1, '%Y-%m-%d')

datetime.datetime(2012, 1, 1, 0, 0)

In [63]:
error

0