In [1]:
# Dependencies
import datetime as dt
import numpy as np
import pandas as pd
import sqlalchemy

from sqlalchemy import *

from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey
from sqlalchemy import inspect
from flask import Flask, jsonify

In [2]:
# Access the SQLite database
engine = create_engine("sqlite:///hawaii.sqlite")
engine.echo = False
metadata = MetaData(engine)

In [3]:
Measurement = Table('Measurement', metadata, autoload=True)
Measurement

Table('Measurement', MetaData(bind=Engine(sqlite:///hawaii.sqlite)), Column('id', INTEGER(), table=<Measurement>, primary_key=True, nullable=False), Column('station', TEXT(), table=<Measurement>), Column('date', TEXT(), table=<Measurement>), Column('prcp', FLOAT(), table=<Measurement>), Column('tobs', FLOAT(), table=<Measurement>), schema=None)

In [4]:
Station = Table('Station', metadata, autoload=True)
Station

Table('Station', MetaData(bind=Engine(sqlite:///hawaii.sqlite)), Column('id', INTEGER(), table=<Station>, primary_key=True, nullable=False), Column('station', TEXT(), table=<Station>), Column('name', TEXT(), table=<Station>), Column('latitude', FLOAT(), table=<Station>), Column('longitude', FLOAT(), table=<Station>), Column('elevation', FLOAT(), table=<Station>), schema=None)

In [5]:
# Reflect the database into classes
Base = automap_base()
Base.prepare(engine, reflect=True)
# View all the classes automap found
Base.classes.keys()
# Save references to each table by creating a variable for each of the classes
Measurement = Base.classes.measurement
Station = Base.classes.station

In [6]:
# Create a session link from Python to the database
session = Session(engine)

In [7]:
# Query measurement table and convert to pandas dataframe
measurement = dt.date
measurement = session.query(
    Measurement.id, 
    Measurement.station, 
    Measurement.date, 
    Measurement.prcp, 
    Measurement.tobs)
measurement_df = pd.DataFrame(measurement, columns=['id', 'station', 'date', 'prcp', 'tobs'])
measurement_df

Unnamed: 0,id,station,date,prcp,tobs
0,1,USC00519397,2010-01-01,0.08,65.0
1,2,USC00519397,2010-01-02,0.00,63.0
2,3,USC00519397,2010-01-03,0.00,74.0
3,4,USC00519397,2010-01-04,0.00,76.0
4,5,USC00519397,2010-01-06,,73.0
...,...,...,...,...,...
19545,19546,USC00516128,2017-08-19,0.09,71.0
19546,19547,USC00516128,2017-08-20,,78.0
19547,19548,USC00516128,2017-08-21,0.56,76.0
19548,19549,USC00516128,2017-08-22,0.50,76.0


In [8]:
# Query station table and convert to pandas dataframe
station = session.query(
    Station.id, 
    Station.station, 
    Station.name, 
    Station.latitude, 
    Station.longitude,
    Station.elevation)
station_df = pd.DataFrame(station, columns=['id', 'station', 'name', 'latitude', 'longitude', 'elevation'])
station_df

Unnamed: 0,id,station,name,latitude,longitude,elevation
0,1,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,2,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,3,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,4,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,5,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
5,6,USC00519523,"WAIMANALO EXPERIMENTAL FARM, HI US",21.33556,-157.71139,19.5
6,7,USC00519281,"WAIHEE 837.5, HI US",21.45167,-157.84889,32.9
7,8,USC00511918,"HONOLULU OBSERVATORY 702.2, HI US",21.3152,-157.9992,0.9
8,9,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [9]:
# Merge measurement and station dataframes with the station column
measurement_station_df = pd.merge(measurement_df, station_df, on='station')
measurement_station_df

Unnamed: 0,id_x,station,date,prcp,tobs,id_y,name,latitude,longitude,elevation
0,1,USC00519397,2010-01-01,0.08,65.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,2,USC00519397,2010-01-02,0.00,63.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
2,3,USC00519397,2010-01-03,0.00,74.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
3,4,USC00519397,2010-01-04,0.00,76.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
4,5,USC00519397,2010-01-06,,73.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
...,...,...,...,...,...,...,...,...,...,...
19545,19546,USC00516128,2017-08-19,0.09,71.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4
19546,19547,USC00516128,2017-08-20,,78.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4
19547,19548,USC00516128,2017-08-21,0.56,76.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4
19548,19549,USC00516128,2017-08-22,0.50,76.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [10]:
# Convert date column using datetime module
# Format month-year
measurement_station_df['date'] = pd.to_datetime(measurement_station_df['date'])
measurement_station_df['month_year'] = measurement_station_df['date'].dt.strftime('%B-%Y')

# June (2010-2017) statistical data for all stations

In [11]:
# Filter for June records
june_df = measurement_station_df[measurement_station_df['date'].dt.month == 6]
june_df

Unnamed: 0,id_x,station,date,prcp,tobs,id_y,name,latitude,longitude,elevation,month_year
133,134,USC00519397,2010-06-01,0.00,78.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,June-2010
134,135,USC00519397,2010-06-02,0.01,76.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,June-2010
135,136,USC00519397,2010-06-03,0.00,78.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,June-2010
136,137,USC00519397,2010-06-04,0.00,76.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,June-2010
137,138,USC00519397,2010-06-05,0.00,77.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,June-2010
...,...,...,...,...,...,...,...,...,...,...,...
19492,19493,USC00516128,2017-06-26,0.02,79.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4,June-2017
19493,19494,USC00516128,2017-06-27,0.10,74.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4,June-2017
19494,19495,USC00516128,2017-06-28,0.02,74.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4,June-2017
19495,19496,USC00516128,2017-06-29,0.04,76.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4,June-2017


### The average precipitation for the month of June across all stations and years is ~0.14.

In [12]:
june_df['prcp'].mean()

0.13635959339263018

### The average temperature for the month of June across all stations and years is ~74.94.

In [13]:
june_df['tobs'].mean()

74.94411764705882

### June-year with the highest precipitation for each station.

USC00511918 - June 2011 - 0.03<br>
USC00513117 - June 2011 - 0.34<br>
USC00514830 - June 2011 - 0.37<br>
USC00516128 - June 2016 - 0.67<br>
USC00517948 - June 2011 - 0.19<br>
USC00518838 - June 2011 - 0.17<br>
USC00519281 - June 2016 - 0.29<br>
USC00519397 - June 2011 - 0.07<br>
USC00519523 - June 2016 - 0.09<br>

*June 2011 is the highest precipitation June-year for 6 out of the 9 stations.

In [14]:
# Create pivot table with June-year as rows and stations as columns
prcp_june_stations = june_df.pivot_table(index=['month_year'], values=['prcp'], columns=['station'], fill_value=0)
prcp_june_stations

Unnamed: 0_level_0,prcp,prcp,prcp,prcp,prcp,prcp,prcp,prcp,prcp
station,USC00511918,USC00513117,USC00514830,USC00516128,USC00517948,USC00518838,USC00519281,USC00519397,USC00519523
month_year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
June-2010,0.010345,0.028667,0.026071,0.244444,0.0165,0.067778,0.053,0.001,0.011724
June-2011,0.031,0.366667,0.373333,0.633,0.185385,0.172857,0.17,0.0704,0.078333
June-2012,0.0075,0.054333,0.08625,0.395,0.032727,0.03,0.097333,0.01,0.055
June-2013,0.009333,0.074,0.03,0.620333,0.025556,0.0,0.164667,0.034667,0.059655
June-2014,0.017037,0.066333,0.078095,0.455517,0.04375,0.0,0.15,0.045333,0.051667
June-2015,0.015333,0.075,0.0904,0.488929,0.0225,0.0,0.151667,0.005667,0.023214
June-2016,0.0,0.173,0.107692,0.6732,0.15,0.0,0.286,0.009,0.087857
June-2017,0.0,0.099333,0.12381,0.357692,0.02,0.0,0.142,0.012333,0.031786


### June-year with the highest temperature for each station.

USC00511918 - June 2010 - 75.62<br>
USC00513117 - June 2017 - 75.97<br>
USC00514830 - June 2017 - 78.88<br>
USC00516128 - June 2017 - 74.93<br>
USC00517948 - June 2015 - 78.69<br>
USC00518838 - June 2011 - 75.40<br>
USC00519281 - June 2017 - 74.77<br>
USC00519397 - June 2017 - 79.20<br>
USC00519523 - June 2017 - 78.90<br>

*June 2017 is the highest temperature June-year for 6 out of the 9 stations.

In [15]:
# Create pivot table with June-year as rows and stations as columns
tobs_june_stations = june_df.pivot_table(index=['month_year'], values=['tobs'], columns=['station'], fill_value=0)
tobs_june_stations

Unnamed: 0_level_0,tobs,tobs,tobs,tobs,tobs,tobs,tobs,tobs,tobs
station,USC00511918,USC00513117,USC00514830,USC00516128,USC00517948,USC00518838,USC00519281,USC00519397,USC00519523
month_year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
June-2010,75.62069,74.8,73.689655,72.863636,76.75,72.565217,73.633333,77.766667,76.172414
June-2011,73.433333,73.466667,72.583333,70.733333,75.190476,75.4,74.615385,76.538462,75.0
June-2012,75.344828,72.2,75.666667,70.964286,74.95,73.2,72.7,76.233333,75.4
June-2013,74.933333,73.566667,76.25,71.2,76.0,0.0,72.766667,77.0,76.241379
June-2014,73.964286,73.833333,76.538462,71.724138,78.466667,0.0,72.733333,78.133333,76.740741
June-2015,70.157895,74.807692,77.296296,71.62069,78.692308,0.0,72.166667,78.3,77.37931
June-2016,0.0,73.866667,77.111111,71.814815,75.809524,0.0,72.966667,77.166667,77.62069
June-2017,0.0,75.966667,78.875,74.928571,78.6,0.0,74.766667,79.2,78.896552


### June (2010-2017) precipitation data distribution for each station.

USC00511918 - Precipitation data is skewed to the right<br>
USC00513117 - Precipitation data is skewed to the right<br>
USC00514830 - Precipitation data is skewed to the right<br>
USC00516128 - Precipitation data is skewed to the right<br>
USC00517948 - Precipitation data is skewed to the right<br>
USC00518838 - Precipitation data is skewed to the right<br>
USC00519281 - Precipitation data is skewed to the right<br>
USC00519397 - Precipitation data is skewed to the right<br>
USC00519523 - Precipitation data is skewed to the left<br>

*8 out of the 9 stations have precipitation data that is skewed to the right.

In [16]:
# Identify statistical data with prcp_june_stations
prcp_june_stations.describe()

Unnamed: 0_level_0,prcp,prcp,prcp,prcp,prcp,prcp,prcp,prcp,prcp
station,USC00511918,USC00513117,USC00514830,USC00516128,USC00517948,USC00518838,USC00519281,USC00519397,USC00519523
count,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0
mean,0.011319,0.117167,0.114456,0.483514,0.062052,0.033829,0.151833,0.02355,0.049905
std,0.010084,0.109343,0.11006,0.150561,0.066419,0.0612,0.066905,0.024327,0.026379
min,0.0,0.028667,0.026071,0.244444,0.0165,0.0,0.053,0.001,0.011724
25%,0.005625,0.063333,0.066071,0.385673,0.021875,0.0,0.130833,0.008167,0.029643
50%,0.009839,0.0745,0.088325,0.472223,0.029141,0.0,0.150833,0.011167,0.053333
75%,0.015759,0.11775,0.111722,0.6235,0.070312,0.039444,0.166,0.037333,0.064325
max,0.031,0.366667,0.373333,0.6732,0.185385,0.172857,0.286,0.0704,0.087857


### June (2010-2017) temperature data distribution for each station.

USC00511918 - Temperature data is skewed to the left<br>
USC00513117 - Temperature data is skewed to the right<br>
USC00514830 - Temperature data is skewed to the left<br>
USC00516128 - Temperature data is skewed to the right<br>
USC00517948 - Temperature data is skewed to the right<br>
USC00518838 - Temperature data is skewed to the right<br>
USC00519281 - Temperature data is skewed to the right<br>
USC00519397 - Temperature data is skewed to the right<br>
USC00519523 - Temperature data is skewed to the right<br>

*7 out of the 9 stations have temperature data that is skewed to the right.

In [17]:
# Identify statistical data with tobs_june_stations
tobs_june_stations.describe()

Unnamed: 0_level_0,tobs,tobs,tobs,tobs,tobs,tobs,tobs,tobs,tobs
station,USC00511918,USC00513117,USC00514830,USC00516128,USC00517948,USC00518838,USC00519281,USC00519397,USC00519523
count,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0
mean,55.431796,74.063462,76.001316,71.981184,76.807372,27.645652,73.29359,77.542308,76.681386
std,34.255687,1.12667,2.023272,1.357044,1.569112,38.16291,0.952684,0.991248,1.264998
min,0.0,72.2,72.583333,70.733333,74.95,0.0,72.166667,76.233333,75.0
25%,52.618421,73.541667,75.172414,71.141071,75.654762,0.0,72.725,76.884615,75.97931
50%,73.69881,73.85,76.394231,71.672414,76.375,0.0,72.866667,77.466667,76.49106
75%,75.036207,74.801923,77.157407,72.07702,78.5,72.723913,73.878846,78.175,77.439655
max,75.62069,75.966667,78.875,74.928571,78.692308,75.4,74.766667,79.2,78.896552


### June-year by precipitation (descending order)

In [18]:
# Create pivot table with June-year as rows and stations aggregate as column
prcp_june_station = june_df.pivot_table(index=['month_year'], values=['prcp'], fill_value=0)
prcp_june_station.sort_values(by=['prcp'], ascending=False)

Unnamed: 0_level_0,prcp
month_year,Unnamed: 1_level_1
June-2011,0.240142
June-2016,0.212312
June-2013,0.144195
June-2015,0.12516
June-2014,0.124372
June-2017,0.12
June-2012,0.097062
June-2010,0.042241


### June-year by temperature (descending order)

In [19]:
# Create pivot table with June-year as rows and stations aggregate as column
tobs_june_station = june_df.pivot_table(index=['month_year'], values=['tobs'], fill_value=0)
tobs_june_station.sort_values(by=['tobs'], ascending=False)

Unnamed: 0_level_0,tobs
month_year,Unnamed: 1_level_1
June-2017,77.219895
June-2016,75.175258
June-2014,75.027907
June-2015,74.990148
June-2010,74.92562
June-2013,74.599078
June-2012,74.0
June-2011,73.938326


### June (2010-2017) precipitation data distribution is skewed to the right.

In [20]:
# Identify statistical data with prcp_june_station
prcp_june_station.describe()

Unnamed: 0,prcp
count,8.0
mean,0.138185
std,0.062694
min,0.042241
25%,0.114265
50%,0.124766
75%,0.161224
max,0.240142


### June (2010-2017) temperature data distribution is skewed to the right.

In [21]:
# Identify statistical data with tobs_june_station
tobs_june_station.describe()

Unnamed: 0,tobs
count,8.0
mean,74.984529
std,1.017773
min,73.938326
25%,74.449309
50%,74.957884
75%,75.064745
max,77.219895


# December (2010-2016) statistical data for all stations

In [22]:
# Filter for December records
dec_df = measurement_station_df[measurement_station_df['date'].dt.month == 12]
dec_df

Unnamed: 0,id_x,station,date,prcp,tobs,id_y,name,latitude,longitude,elevation,month_year
305,306,USC00519397,2010-12-01,0.04,76.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,December-2010
306,307,USC00519397,2010-12-03,0.00,74.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,December-2010
307,308,USC00519397,2010-12-04,0.00,74.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,December-2010
308,309,USC00519397,2010-12-06,0.00,64.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,December-2010
309,310,USC00519397,2010-12-07,0.00,64.0,1,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0,December-2010
...,...,...,...,...,...,...,...,...,...,...,...
19323,19324,USC00516128,2016-12-27,0.14,71.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4,December-2016
19324,19325,USC00516128,2016-12-28,0.14,71.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4,December-2016
19325,19326,USC00516128,2016-12-29,1.03,69.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4,December-2016
19326,19327,USC00516128,2016-12-30,2.37,65.0,9,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4,December-2016


### The average precipitation for the month of December across all stations and years is ~0.22.

In [23]:
dec_df['prcp'].mean()

0.21681850533807792

### The average temperature for the month of December across all stations and years is ~71.04.

In [24]:
dec_df['tobs'].mean()

71.04152933421226

### December-year with the highest precipitation for each station.

USC00511918 - December 2010 - 0.48<br>
USC00513117 - December 2010 - 0.58<br>
USC00514830 - December 2010 - 0.35<br>
USC00516128 - December 2011 - 0.73<br>
USC00517948 - December 2011 - 0.40<br>
USC00518838 - December 2010 - 0.69<br>
USC00519281 - December 2010 - 0.60<br>
USC00519397 - December 2010 - 0.19<br>
USC00519523 - December 2010 - 0.42<br>

*December 2010 is the highest precipitation December-year for 7 out of the 9 stations.

In [25]:
# Create pivot table with December-year as rows and stations as columns
prcp_dec_stations = dec_df.pivot_table(index=['month_year'], values=['prcp'], columns=['station'], fill_value=0)
prcp_dec_stations

Unnamed: 0_level_0,prcp,prcp,prcp,prcp,prcp,prcp,prcp,prcp,prcp
station,USC00511918,USC00513117,USC00514830,USC00516128,USC00517948,USC00518838,USC00519281,USC00519397,USC00519523
month_year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
December-2010,0.478065,0.581613,0.347241,0.544516,0.2275,0.686,0.59871,0.189167,0.422581
December-2011,0.021613,0.136774,0.099524,0.725172,0.396,0.0,0.192581,0.071613,0.101935
December-2012,0.009,0.056452,0.090833,0.278387,0.01625,0.0,0.106129,0.027857,0.069677
December-2013,0.132667,0.156774,0.060435,0.388148,0.005,0.16,0.231935,0.113871,0.140968
December-2014,0.038621,0.165806,0.193529,0.5912,0.066667,0.0,0.212903,0.046129,0.188621
December-2015,0.0,0.183,0.0765,0.4468,0.195,0.0,0.162258,0.043226,0.10913
December-2016,0.0,0.141613,0.1328,0.583103,0.116667,0.0,0.21,0.056452,0.081429


### December-year with the highest temperature for each station.

USC00511918 - December 2012 - 70.57<br>
USC00513117 - December 2015 - 73.07<br>
USC00514830 - December 2015 - 76.79<br>
USC00516128 - December 2015 - 70.64<br>
USC00517948 - December 2014 - 74.55<br>
USC00518838 - December 2012 - 73.50<br>
USC00519281 - December 2015 - 73.39<br>
USC00519397 - December 2015 - 73.19<br>
USC00519523 - December 2015 - 74.04<br>

*December 2015 is the highest temperature December-year for 6 out of the 9 stations.

In [26]:
# Create pivot table with December-year as rows and stations as columns
tobs_dec_stations = dec_df.pivot_table(index=['month_year'], values=['tobs'], columns=['station'], fill_value=0)
tobs_dec_stations

Unnamed: 0_level_0,tobs,tobs,tobs,tobs,tobs,tobs,tobs,tobs,tobs
station,USC00511918,USC00513117,USC00514830,USC00516128,USC00517948,USC00518838,USC00519281,USC00519397,USC00519523
month_year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
December-2010,70.0,70.967742,69.233333,69.483871,69.5,72.230769,69.0,70.64,71.612903
December-2011,70.483871,71.096774,70.653846,69.166667,70.916667,0.0,70.129032,71.870968,72.225806
December-2012,70.566667,70.064516,74.3,68.806452,73.166667,73.5,70.129032,71.1,73.516129
December-2013,69.333333,71.483871,74.333333,70.068966,70.95,72.5,69.548387,70.935484,72.16129
December-2014,68.0,70.354839,74.173913,68.222222,74.55,0.0,66.709677,69.387097,70.266667
December-2015,0.0,73.066667,76.791667,70.642857,73.384615,0.0,73.387097,73.193548,74.037037
December-2016,0.0,70.516129,74.214286,68.666667,70.15,0.0,70.419355,70.548387,73.413793


### December (2010-2016) precipitation data distribution for each station.

USC00511918 - Precipitation data is skewed to the right<br>
USC00513117 - Precipitation data is skewed to the right<br>
USC00514830 - Precipitation data is skewed to the right<br>
USC00516128 - Precipitation data is skewed to the left<br>
USC00517948 - Precipitation data is skewed to the right<br>
USC00518838 - Precipitation data is skewed to the right<br>
USC00519281 - Precipitation data is skewed to the right<br>
USC00519397 - Precipitation data is skewed to the right<br>
USC00519523 - Precipitation data is skewed to the right<br>

*8 out of the 9 stations have precipitation data that is skewed to the right.

In [27]:
# Identify statistical data with prcp_dec_stations
prcp_dec_stations.describe()

Unnamed: 0_level_0,prcp,prcp,prcp,prcp,prcp,prcp,prcp,prcp,prcp
station,USC00511918,USC00513117,USC00514830,USC00516128,USC00517948,USC00518838,USC00519281,USC00519397,USC00519523
count,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0
mean,0.097138,0.203147,0.14298,0.50819,0.146155,0.120857,0.244931,0.078331,0.159192
std,0.174229,0.171706,0.100185,0.148263,0.138692,0.256239,0.16146,0.056108,0.122757
min,0.0,0.056452,0.060435,0.278387,0.005,0.0,0.106129,0.027857,0.069677
25%,0.0045,0.139194,0.083667,0.417474,0.041458,0.0,0.177419,0.044677,0.091682
50%,0.021613,0.156774,0.099524,0.544516,0.116667,0.0,0.21,0.056452,0.10913
75%,0.085644,0.174403,0.163165,0.587152,0.21125,0.08,0.222419,0.092742,0.164794
max,0.478065,0.581613,0.347241,0.725172,0.396,0.686,0.59871,0.189167,0.422581


### December (2010-2016) temperature data distribution for each station.

USC00511918 - Temperature data is skewed to the left<br>
USC00513117 - Temperature data is skewed to the right<br>
USC00514830 - Temperature data is skewed to the left<br>
USC00516128 - Temperature data is skewed to the right<br>
USC00517948 - Temperature data is skewed to the right<br>
USC00518838 - Temperature data is skewed to the right<br>
USC00519281 - Temperature data is skewed to the left<br>
USC00519397 - Temperature data is skewed to the right<br>
USC00519523 - Temperature data is skewed to the right<br>

*6 out of the 9 stations have temperature data that is skewed to the right.

In [28]:
# Identify statistical data with tobs_dec_stations
tobs_dec_stations.describe()

Unnamed: 0_level_0,tobs,tobs,tobs,tobs,tobs,tobs,tobs,tobs,tobs
station,USC00511918,USC00513117,USC00514830,USC00516128,USC00517948,USC00518838,USC00519281,USC00519397,USC00519523
count,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0
mean,49.769124,71.078648,73.385768,69.293957,71.802564,31.175824,69.903226,71.096498,72.461947
std,34.009757,0.99982,2.560845,0.841844,1.891054,38.885001,1.985204,1.187029,1.30285
min,0.0,70.064516,69.233333,68.222222,69.5,0.0,66.709677,69.387097,70.266667
25%,34.0,70.435484,72.41388,68.736559,70.533333,0.0,69.274194,70.594194,71.887097
50%,69.333333,70.967742,74.214286,69.166667,70.95,0.0,70.129032,70.935484,72.225806
75%,70.241935,71.290323,74.316667,69.776418,73.275641,72.365385,70.274194,71.485484,73.464961
max,70.566667,73.066667,76.791667,70.642857,74.55,73.5,73.387097,73.193548,74.037037


### December-year by precipitation (descending order)

In [29]:
# Create pivot table with December-year as rows and stations aggregate as column
prcp_dec_station = dec_df.pivot_table(index=['month_year'], values=['prcp'], fill_value=0)
prcp_dec_station.sort_values(by=['prcp'], ascending=False)

Unnamed: 0_level_0,prcp
month_year,Unnamed: 1_level_1
December-2010,0.459087
December-2011,0.201581
December-2016,0.199494
December-2014,0.188439
December-2015,0.169506
December-2013,0.169014
December-2012,0.089604


### December-year by temperature (descending order)

In [30]:
# Create pivot table with December-year as rows and stations aggregate as column
tobs_dec_station = dec_df.pivot_table(index=['month_year'], values=['tobs'], fill_value=0)
tobs_dec_station.sort_values(by=['tobs'], ascending=False)

Unnamed: 0_level_0,tobs
month_year,Unnamed: 1_level_1
December-2015,73.423913
December-2012,71.188073
December-2016,71.13
December-2013,71.094017
December-2011,70.820628
December-2010,70.208511
December-2014,69.896861


### December (2010-2016) precipitation data distribution is skewed to the right.

In [31]:
# Identify statistical data with prcp_dec_station
prcp_dec_station.describe()

Unnamed: 0,prcp
count,7.0
mean,0.210961
std,0.115829
min,0.089604
25%,0.16926
50%,0.188439
75%,0.200538
max,0.459087


### December (2010-2016) temperature data distribution is skewed to the right.

In [32]:
# Identify statistical data with tobs_dec_station
tobs_dec_station.describe()

Unnamed: 0,tobs
count,7.0
mean,71.108858
std,1.134979
min,69.896861
25%,70.514569
50%,71.094017
75%,71.159037
max,73.423913


# June and December weather analysis
Note: there is no recorded weather data for December 2017.

### Key statistical data for temperature in June (2010-2017) across all of the stations and years
count (months): 8.00<br>
mean: 74.98<br>
std: 1.02<br>
min: 73.94<br>
25%: 74.45<br>
50%: 74.96<br>
75%: 75.06<br>
max: 77.22<br>


### Key statistical data for temperature in December across all of the stations and years
count (months): 7.00<br>
mean: 71.11<br>
std: 1.13<br>
min: 69.90<br>
25%: 70.51<br>
50%: 71.09<br>
75%: 71.16<br>
max: 73.42<br>

### Temperature Summary
The average June temperature at 74.94 is higher than the average December temperature at 71.04. With respect to each month's yearly data, 2017 was the year for June with the highest average temperature at 77.22 (there is no 2017 weather data for December), and 2015 was the year for December with the highest temperature at 73.42 (2015 was the fourth highest for June at 74.99).

The highest recorded average June temperature was in 2017 for station USC00519397 at 79.20. The highest recorded December average temperature was in 2015 for station USC00514830 at 76.79. Of the highest average temperature for each respective station for 2010-2017, June recorded station USC00516128 with the lowest average in 2017 at 74.93, December recorded station USC00519397 with the lowest average in 2010 at 0.19

### Precipitation Summary
The average December percipitation at 0.22 is higher than the average June percipitation at 0.14. With respect to each month's yearly data, 2010 was the year for December with the highest average precipitation at 0.46 (2010 had the lowest average for June at 0.04), and 2011 was the year for June with the highest average precipitation at 0.24 (2011 had the second highest average for December at 0.2).

The highest recorded average June precipitation was in 2016 for station USC00516128 at 0.67. Similarly, the highest recorded average December precipitation was also for station USC00516128 at 0.73 in 2011. Of the highest average precipitation for each respective station for 2010-2017, June recorded station USC00519397 with the lowest average in 2011 at 0.07. Similarly, December recorded station USC00519397 with the lowest average in 2010 at 0.19.

### Recommendations for future analysis
Our weather analysis so far has been helpful to understand the precipitation and temperature trends for June and December. Depending on the months of the year that the shop will be open, we can expand the dataset to provide similar weather analysis. Once we have this complete view of the monthly weather patterns, the next step will be to identify a location in Oahu. The dataset already includes the latitude and longitude of the station which provides a general location area to explore. Additional variables to consider is the proximity to beaches, surfing conditions, and availability of other similar shops.