In [1]:

# Importing dependencies
#*******************************************
import matplotlib
from matplotlib import style
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime as dt
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func

ModuleNotFoundError: No module named 'matplotlib'

In [2]:

# SQL Connection and attributes
engine = create_engine("sqlite:///hawaii.sqlite", echo=False)
Base = automap_base()
Base.prepare(engine, reflect=True)

NameError: name 'create_engine' is not defined

In [None]:
# Verify tables and structure from SQLite database
inspector = inspect(engine)
tbl_names = inspector.get_table_names()
i =1 
for table in tbl_names:
    print("Table "+str(i)+": "+str(table))
    i = i+1
    columns = inspector.get_columns(table)
    for col in columns:
        print("     Col: "+str(col["name"])+" ("+str(col["type"])+")")

In [None]:
# Obtaining class information
print(str(inspector.get_pk_constraint("stations")))
print(str(inspector.get_pk_constraint("measurements")))

In [None]:
# Assigning table classes
Measurements = Base.classes.measurements
Stations = Base.classes.stations

In [None]:
# Creating Precipitation Analysis pandas dataframe to house the "date" and "prcp" values from the "measurements" table
prcp_analysis_df = pd.read_sql("SELECT date, prcp FROM measurements", con=engine, columns=[["date"],["prcp"]])

In [None]:
# Converting the "date" field into the date data-type
prcp_analysis_df["date"] = pd.to_datetime(prcp_analysis_df["date"],format="%Y-%m-%d", errors="coerce")

In [None]:
# Determining the max date and subtracting 365 days to create the minimum date (or the past 12 months)
pa_max_date = prcp_analysis_df["date"].max().date()
pa_today = dt.date.today()
pa_min_date = (pa_max_date - dt.timedelta(days=365))
print("Date Range: "+str(pa_min_date)+" to "+str(pa_max_date))

In [None]:
# Dropping all records from Precipitation Analysis dataframe that are not within the last 12 months from the max date
prcp_analysis_df = prcp_analysis_df.loc[prcp_analysis_df["date"]>=pa_min_date]

In [None]:
prcp_analysis_df.set_index("date", inplace=True)
prcp_analysis_df.describe(include="all")

In [None]:
ax = prcp_analysis_df.plot.line(y="prcp",figsize=(20,8), fontsize=17, color="#33B3FF")
ax.patch.set_facecolor("#000000")
ax.set_xlabel(xlabel="\n\nPast 12 Months of Data", fontsize=20, color="#000000", fontweight="bold")
ax.set_frame_on(b=True)
ax.set_facecolor("#202020")
ax.grid(True, alpha=.3)
ax.legend(frameon=True, fancybox=True, facecolor="#FFFFFF", edgecolor="#000000", shadow=True, loc="best", fontsize=15, mode="exapnd")
ax.label_outer()
ax.set_title("Precipitation Analysis\n", fontsize=28, color="#000000", fontweight="bold")
plt.tight_layout()
plt.savefig("prcp_analysis.png")

In [None]:
# Counting activity by station data to determine most active station
active_df = pd.read_sql("SELECT s.station, count(m.station) as station_count FROM measurements m, stations s WHERE m.station=s.station GROUP BY m.station", con=engine, columns=[["station"],["station_count"]])

In [None]:
# Sorting the values by highest value count first
active_df.sort_values(by="station_count", ascending=False, inplace=True)
# Settings the "station" as the index
indexed_activity = active_df.set_index("station")
# Dataframe for most active staitons sorted by station count
indexed_activity

In [None]:
max_activity = indexed_activity.station_count.max()

# Displaying the station name observation count of max_activity query
most_active = list(np.where(indexed_activity["station_count"]==max_activity)[0])
station_max = indexed_activity.iloc[most_active]
station_max =str(station_max.index[0])
print("Station with the most observations recorded: "+station_max+
      ", with a count of {:,} observations".format(max_activity))

In [None]:
# Creating a query to database session to find all the "tobs" values from the "station_max" results for the past 12 months
sh_results = session.query(Measurements.date,Measurements.tobs).\
            filter(Measurements.date >= pa_min_date).\
            filter(Measurements.station==station_max).all()

In [None]:

# Converting the results of the query into a pandas dataframe
sh_max_df = pd.DataFrame(np.array(sh_results), columns=(["date","tobs"]))
sh_max_df

In [None]:
# Converting the data types so that the "tobs" column will return a value
sh_max_df.tobs = sh_max_df.tobs.astype(float)

In [None]:
# Verifying value types
sh_max_df.dtypes
sh_max_df.describe(include="all")


In [None]:

ax = sh_max_df.plot.hist(by=None, bins=12, figsize=(15, 13), color="#CF0055", fontsize=17, edgecolor="#FFC1DA")
ax.set_xlabel(xlabel="\n\nPast 12 Months of Data", fontsize=20, color="#000000", fontweight="bold")
ax.set_frame_on(b=True)
ax.set_facecolor("#222222")
ax.grid(True, alpha=.3)
ax.set_ylabel(ylabel="Frequency\n\n", fontsize=20,fontweight="bold")
ax.legend(frameon=True, fancybox=True, facecolor="#FFFFFF", edgecolor="#FFC1DA", shadow=True, loc="best", fontsize=20, mode="exapnd")
ax.label_outer()
ax.set_title(str(station_max+" Historgram\n"), fontsize=32, color="#000000",fontweight="bold")
plt.tight_layout()
plt.savefig("tobs_histogram.png")