In [1]:
import pandas as pd
import numpy as np
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy import MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, Date, ForeignKey

In [2]:
# Import data into data frames and rename columns in preparation for storage.
measures = pd.read_csv('hawaii_measurements.csv', names=('station_name', 'date', 'precipitation', 'temperature'), header=0, parse_dates=[1])
stations = pd.read_csv('hawaii_stations.csv', names=('station_name', 'location_name', 'latitude', 'longitude', 'elevation'), header=0)

In [3]:
measures.head()

Unnamed: 0,station_name,date,precipitation,temperature
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [4]:
# There are 1,447 incomplete cases in "measures" due to NaN values in measures['prcp'].

measures.isnull().sum()

station_name        0
date                0
precipitation    1447
temperature         0
dtype: int64

In [5]:
# There are no NaN values in "stations"

stations

Unnamed: 0,station_name,location_name,latitude,longitude,elevation
0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
5,USC00519523,"WAIMANALO EXPERIMENTAL FARM, HI US",21.33556,-157.71139,19.5
6,USC00519281,"WAIHEE 837.5, HI US",21.45167,-157.84889,32.9
7,USC00511918,"HONOLULU OBSERVATORY 702.2, HI US",21.3152,-157.9992,0.9
8,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [6]:
# If complete cases are needed, Pandas' .dropna() can be used to elimate all rows with NaNs.
# Here a "clean" copy of the data is saved to a new CSV.

measures_clean = measures.dropna(axis=0)
measures_clean.to_csv("hawaii_measurements_clean.csv")

In [7]:
# Create engine and connection to SQLite.

engine = create_engine("sqlite:///:hawaii_weather.sqlite")
conn = engine.connect()


In [11]:
base = declarative_base()

In [12]:
class station(base):
    __tablename__ = 'station'
    
    station_name = Column(String(255), primary_key=True)
    location_name = Column(String(255))
    latitude = Column(Numeric)
    longitude = Column(Numeric)
    elevation = Column(Numeric)
    
    def __repr__(self):
        return '<station(%r, %r)>' % (self.station_name, self.location_name)

class measurement(base):
    __tablename__ = 'measurement'
    
    id = Column(Integer, primary_key=True)
    station_name = Column(String(255), ForeignKey('station.station_name'))
    date = Column(Date)
    precipitation = Column(Numeric, nullable=True)
    temperature = Column(Numeric, nullable=True)
    
    def __repr__(self):
        return "<measure(%r, %r)>" % (self.station_name, self.date)

TypeError: evaluates_none() missing 1 required positional argument: 'self'

In [None]:
station.__table__

In [None]:
measurement.__table__

In [None]:
base.metadata.create_all(engine)

In [None]:
stations_dict = stations.to_dict(orient='records')
stations_dict[0]

In [None]:
measures_dict = measures.to_dict(orient='records')
measures_dict[0]

In [None]:
# Use MetaData from SQLAlchemy to reflect the tables
metadata = MetaData(bind=engine)
metadata.reflect()

In [None]:
# Save the reference to the `customers` table as a variable called `table`
station_table = sqlalchemy.Table('station', metadata, autoload=True)
measurement_table = sqlalchemy.Table('measurement', metadata, autoload=True)

In [None]:
conn.execute(station_table.insert(), stations_dict)

In [None]:
conn.execute("select * from station limit 5").fetchall()

In [None]:
conn.execute(measurement_table.insert(), measures_dict)

In [None]:
conn.execute("select * from measurement limit 5").fetchall()