In [1]:
%matplotlib notebook
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt

In [2]:
import numpy as np
import pandas as pd

In [3]:
import datetime as dt

In [4]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect

In [5]:
engine = create_engine("sqlite:///Resources/hawaii.sqlite")
inspector = inspect(engine)

In [6]:
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

In [7]:
# We can view all of the classes that automap found
Base.classes.keys()

['measurement', 'station']

In [8]:
# Save references to each table
Measurement = Base.classes.measurement
Station = Base.classes.station

In [9]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [10]:
measurement = session.query(Measurement).first()
measurement.__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x1b2ace063c8>,
 'prcp': 0.08,
 'station': 'USC00519397',
 'id': 1,
 'tobs': 65.0,
 'date': '2010-01-01'}

In [29]:
# Design a query to retrieve the last 12 months of precipitation data and plot the results
# Calculate the date 1 year ago from today
year_ago = dt.date(2017, 8, 23) - dt.timedelta(days=365)
#year_ago
# Perform a query to retrieve the data and precipitation scores between 8/24/2016 and 8/23/2017 and arrange by date
precip_query = session.query(Measurement.date, Measurement.prcp).filter(Measurement.date >= year_ago).\
group_by(Measurement.date).order_by((Measurement.date).asc()).all()

precip_query

# Save the query results as a Pandas DataFrame and set the index to the date column
precip_df = pd.DataFrame(precip_query)
#precip_df.head()
precipitation = precip_df.set_index("date")
precipitation.columns = ['Precipitation']
precipitation.head()


Unnamed: 0_level_0,Precipitation
date,Unnamed: 1_level_1
2016-08-23,0.7
2016-08-24,1.45
2016-08-25,0.11
2016-08-26,0.01
2016-08-27,


In [30]:
# Use Pandas Plotting with Matplotlib to plot the data
precipitation.plot()
plt.tight_layout()
# Rotate the xticks for the dates
plt.xticks(rotation=45)
plt.show()

<IPython.core.display.Javascript object>

In [14]:
#Use Pandas to calculate the summary statistics for the precipitation data
precipitation.describe()

Unnamed: 0,Precipitation
count,344.0
mean,0.431308
std,0.70524
min,0.0
25%,0.01
50%,0.14
75%,0.565
max,6.7


In [16]:
station = session.query(Station).first()
station.__dict__

{'_sa_instance_state': <sqlalchemy.orm.state.InstanceState at 0x1b2ad792358>,
 'longitude': -157.8168,
 'name': 'WAIKIKI 717.2, HI US',
 'id': 1,
 'elevation': 3.0,
 'latitude': 21.2716,
 'station': 'USC00519397'}

In [17]:
# How many stations are available in this dataset?
station_count = session.query(Station).count()
station_count

9

In [18]:
# What are the most active stations?
# List the stations and the counts in descending order.
station_activity = session.query(Station.station, Station.name, func.count(Measurement.tobs)).\
group_by(Measurement.station).filter(Station.station ==  Measurement.station).group_by(Measurement.station).\
order_by(func.count(Measurement.tobs).desc()).all()

station_activity

[('USC00519281', 'WAIHEE 837.5, HI US', 2772),
 ('USC00519397', 'WAIKIKI 717.2, HI US', 2724),
 ('USC00513117', 'KANEOHE 838.1, HI US', 2709),
 ('USC00519523', 'WAIMANALO EXPERIMENTAL FARM, HI US', 2669),
 ('USC00516128', 'MANOA LYON ARBO 785.2, HI US', 2612),
 ('USC00514830', 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 2202),
 ('USC00511918', 'HONOLULU OBSERVATORY 702.2, HI US', 1979),
 ('USC00517948', 'PEARL CITY, HI US', 1372),
 ('USC00518838', 'UPPER WAHIAWA 874.3, HI US', 511)]

In [19]:
# Using the station id from the previous query, calculate the lowest temperature recorded, 
# highest temperature recorded, and average temperature most active station?
active_station = session.query(Measurement.station, Station.name, func.min(Measurement.tobs), func.max(Measurement.tobs), func.avg(Measurement.tobs)).\
filter(Measurement.station == 'USC00519281').all()

active_station

[('USC00519281', 'WAIKIKI 717.2, HI US', 54.0, 85.0, 71.66378066378067)]

In [31]:
# Choose the station with the highest number of temperature observations.
# Query the last 12 months of temperature observation data for 'Waikiki' station
waikiki = session.query(Measurement.station, Measurement.tobs).filter(Measurement.date >= year_ago).filter(Measurement.station == 'USC00519281').all()

waikiki

# Create dataframe for Waikiki station data
waikiki_df = pd.DataFrame(waikiki)
waikiki_df.head()

Unnamed: 0,station,tobs
0,USC00519281,77.0
1,USC00519281,77.0
2,USC00519281,80.0
3,USC00519281,80.0
4,USC00519281,75.0


In [32]:
# Plot 'waikiki' station observations as histogram
waikiki_df.plot.hist(bins=12)
plt.ylabel("Frequency")
plt.title("Waikiki Station Temperature Observation")
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>