In [1]:
#dependencies
import numpy as np
import pandas as pd
import datetime as dt
import os
import matplotlib.pyplot as plt
import seaborn as sns

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine,inspect
from sqlalchemy import func

In [2]:
#start a session to query the database
engine = create_engine("sqlite:///hawaii.sqlite")

In [3]:
#show existing databases 
Base = automap_base()
Base.prepare(engine, reflect=True)

In [4]:
#view all classes
Base.classes.keys()

['clean_measurements', 'stations']

In [5]:
#save references to each table
stations = Base.classes.stations
measurements = Base.classes.clean_measurements

In [6]:
#create session from Python to databases
session = Session(engine)

In [7]:
#retrieve last 12 months of precipitation
prior_year = dt.date.today() - dt.timedelta(days=365)
precipitation = session.query(measurements.date, measurements.prcp).filter(measurements.date >= prior_year).all()

In [9]:
#save query as Panadas dataframe and set index to date column
df = pd.DataFrame(precipitation, columns=['date', 'precipitation'])
df.set_index(df['date'], inplace=True)
df.head()

Unnamed: 0_level_0,date,precipitation
date,Unnamed: 1_level_1,Unnamed: 2_level_1


In [11]:
#create DataFrame plot method
sns.set_style('darkgrid')
df.plot(rot=90)
plt.title("12 Months Precipitation")
plt.xlable("Date")
plt.ylable("Inches")
x_values = df.index
plt.xticks(np.arange(len(x_values),step=200), x_values[::200], rotation=60)
plt.show()

TypeError: Empty 'DataFrame': no numeric data to plot

In [12]:
#print summary statistics of precipitation data
df.describe()

Unnamed: 0,date,precipitation
count,0,0
unique,0,0


In [19]:
#number of stations
from sqlalchemy import func
station_count = len(session.query(stations.station).distinct().all())
print(f"There are {station_count} stations.")

There are 0 stations.


In [21]:
#active stations
active_stations = session.query(measurements.station, func.count(measurements.station)).group_by(measurements.station).order_by(func.count(measurements.station).desc()).all()
print(active_stations)

[]


In [22]:
print(f"Station with the largest number of observations: {active_stations}.")

Station with the largest number of observations: [].


In [23]:
#retrieve last 12 months of temperatures
temperature = session.query(measurements.station, measurements.tobs).filter(measurements.date).order_by(measurements.date).all()

In [None]:
#filter station with highest number of observations
station_observations = session.query(measurements.date, measurements.tobs).filter(measure.date.filter(measurements.station == "USC00519281").order_by(measurements.date).all()

In [None]:
#dataframe of values
df2 = pd.DataFrame(station_observations, columns = list(["date", "tobs"]))
df2.set_index("date", inplace=True)
df2.head()

In [None]:
#calculate min, max, & steps for binding
min = int(df2.tobs.min())
max = int(df2.tobs.max())
step = int(max - min)/12)
print(min)
print(max)
print(step)

In [None]:
#create list containing bins
bins1 = [x for x in range(min, max, step)]

In [None]:
#create bar graph
plt.hist(df2["tobs"], bins=bins1, histtype='bar')
plt.title("Station Analysis")
plt.xlable("TOBS")
plt.ylable("Frequency")

In [None]:
#start and end dates
start_date = input("Enter start date in format %Y-%m-%d: ")
end_date = input("Enter end date in format %Y-%m-%d: ")

def calculate_temperature(start_date, end_date):
        min_temp=0;
        max_temp=0;
        avg_temp=0;
        
        temp_list = session.query(measurements.tobs).filter(measurements.date >= start_date).filter(measurements.date <= end_date).order_by(measurements.date).all()
        
        clean_temp = [r[0] for r in temp_list]
        
        min_temp = np.min(clean_temp)
        max_temp = np.max(clean_temp)
        avg_temp = int(np.floor(np.average(clean_temp)))
        
#return results
results = f"Lowest temperature in fahrenheit {min_temp}\n
Highest temperature in fahrenheit {max_temp}\n
Average temperature in fahrenheit{avg_temp}"

In [None]:
#create bar graph
fig, ax = plt.subplots()
ax.bar(1, avg_temp, width=1, yerr=(avg_temp), align='center', alpha=0.5)
plt.title("Temperature Analysis")
plt.ylable("Temperature (F)")
return(results)
print(calculate_temperature(start_date, end_date))