In [1]:
import pandas as pd
import sqlite3 as sql

## Creating a connection to the weather.db database

In [2]:
# Telling python what database to use

conn = sql.connect('../data/weather.db')

# Activating the cursor, which will allow us to pass queries through sqlite and access the database directly

cur = conn.cursor()

## Checking what tables exist within a database

In [3]:
# Execute tells the cursor to run the query provided

cur.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")

# fetchall() allows us to see everything returned by our query, which we store in our 'available_tables' variable

available_tables = (cur.fetchall())

In [4]:
print(available_tables)

[('rainfall_2013',), ('weather_stations',)]


## Querying the 'weather_stations' table

In [5]:
# Let's check to see what our data looks like by selecting the first 5 rows

cur.execute("SELECT * FROM weather_stations LIMIT 5;")

weather_stations = (cur.fetchall())

In [6]:
# What is the type of weather_data_2013?

type(weather_stations)

list

In [7]:
weather_stations

[('US1TNCH0002',
  'KINGSTON SPRINGS 0.3 ENE, TN US',
  '36.0984',
  '-87.0963',
  '177.1'),
 ('US1TNCH0001', 'ASHLAND CITY 5.6 ENE, TN US', '36.306', '-86.966', '232.9'),
 ('USC00403280',
  'FRANKLIN SEWAGE PLANT, TN US',
  '35.9417',
  '-86.8685',
  '199.6'),
 ('US1TNCH0007',
  'KINGSTON SPRINGS 1.4 SW, TN US',
  '36.0806',
  '-87.1165',
  '224.0'),
 ('US1TNCH0004',
  'KINGSTON SPRINGS 4.8 SW, TN US',
  '36.0556',
  '-87.1701',
  '238.0')]

## Using Pandas to place results into a DataFrame

In [8]:
# Using the same query as before to select the first 5 rows from weather2013.db

weather_stations_df = pd.read_sql("SELECT * FROM weather_stations;", conn)

In [9]:
# Checking the type again

type(weather_stations_df)

pandas.core.frame.DataFrame

In [10]:
weather_stations_df.head()

Unnamed: 0,station,name,lat,lng,elevation
0,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.0984,-87.0963,177.1
1,US1TNCH0001,"ASHLAND CITY 5.6 ENE, TN US",36.306,-86.966,232.9
2,USC00403280,"FRANKLIN SEWAGE PLANT, TN US",35.9417,-86.8685,199.6
3,US1TNCH0007,"KINGSTON SPRINGS 1.4 SW, TN US",36.0806,-87.1165,224.0
4,US1TNCH0004,"KINGSTON SPRINGS 4.8 SW, TN US",36.0556,-87.1701,238.0


In [11]:
weather_stations_df.shape

(155, 5)

## Example of a query using the DISTINCT clause

In [12]:
# When building sql queries, it may be useful to write the query independently and save to variable
# then pass variable to function
# Multi-line queries can use triple quotes (''' ''') to keep it together as one string

query = '''
SELECT DISTINCT(elevation)
FROM weather_stations;
'''

In [13]:
unique_elevations = pd.read_sql(query, conn)

In [14]:
unique_elevations

Unnamed: 0,elevation
0,177.1
1,232.9
2,199.6
3,224.0
4,238.0
...,...
112,188.4
113,143.9
114,178.9
115,138.7


## Example of a query using the WHERE clause

In [15]:
query = '''
SELECT *
FROM weather_stations
WHERE elevation > 200;
'''

In [16]:
# This time, we'll query weather2013 to find the dates where the percipitation was greater than 1

high_elevation = pd.read_sql(query, conn)

In [17]:
high_elevation.head()

Unnamed: 0,station,name,lat,lng,elevation
0,US1TNCH0001,"ASHLAND CITY 5.6 ENE, TN US",36.306,-86.966,232.9
1,US1TNCH0007,"KINGSTON SPRINGS 1.4 SW, TN US",36.0806,-87.1165,224.0
2,US1TNCH0004,"KINGSTON SPRINGS 4.8 SW, TN US",36.0556,-87.1701,238.0
3,US1TNRB0003,"SPRINGFIELD 1.8 WSW, TN US",36.493,-86.9107,201.5
4,US1TNRB0002,"CROSS PLAINS 3.7 W, TN US",36.5502,-86.7476,223.1


## Example of a query using the BETWEEN clause

In [18]:
query = '''
SELECT *
FROM weather_stations
WHERE elevation BETWEEN 200 AND 220;
'''

In [19]:
# This time, we'll query weather2013 to find the dates where the percipitation was between 0.2 and 0.5

mid_elevation = pd.read_sql(query, conn)
mid_elevation.head()

Unnamed: 0,station,name,lat,lng,elevation
0,US1TNRB0003,"SPRINGFIELD 1.8 WSW, TN US",36.493,-86.9107,201.5
1,USC00406435,"NEAPOLIS EXPERIMENTAL STATION, TN US",35.7197,-86.9654,213.4
2,US1TNDV0102,"BELLE MEADE 1.2 ESE, TN US",36.094,-86.8341,200.9
3,US1TNDV0075,"BRENTWOOD 2.8 NE, TN US",36.0243,-86.7509,210.0
4,US1TNWL0010,"NOLENSVILLE 1.0 NNW, TN US",35.9597,-86.6575,216.4


## Same example, but using the ORDER BY to sort on percipitation

In [20]:
query = '''
SELECT *
FROM weather_stations
WHERE elevation BETWEEN 200 AND 220
ORDER BY elevation;
'''

In [21]:
# Using ASC or DESC to specify how to order the desired column - Is ASC or DESC used by default?

mid_elevation = pd.read_sql(query, conn)
mid_elevation.head()

Unnamed: 0,station,name,lat,lng,elevation
0,US1TNDV0102,"BELLE MEADE 1.2 ESE, TN US",36.094,-86.8341,200.9
1,US1TNRB0035,"PLEASANT VIEW 2.8 ESE, TN US",36.3806,-86.9898,201.2
2,US1TNRB0003,"SPRINGFIELD 1.8 WSW, TN US",36.493,-86.9107,201.5
3,US1TNWN0068,"LEBANON 4.1 NW, TN US",36.2451,-86.3811,202.1
4,US1TNWL0070,"FRANKLIN 2.5 W, TN US",35.9285,-86.8872,202.1


## What's in the other table?

In [22]:
query = '''
SELECT *
FROM rainfall_2013
LIMIT 5;
'''

In [23]:
rainfall_2013 = pd.read_sql(query, conn)
rainfall_2013.head()

Unnamed: 0,record_id,station,date,precip,avg_temp
0,1,US1TNCH0002,1/1/13,0.38,
1,2,US1TNCH0002,1/2/13,0.08,
2,3,US1TNCH0002,1/3/13,0.0,
3,4,US1TNCH0002,1/4/13,0.0,
4,5,US1TNCH0002,1/5/13,0.0,


## Example of using JOIN to combine tables

In [24]:
query = '''
SELECT *
FROM rainfall_2013
LEFT JOIN weather_stations ON rainfall_2013.station = weather_stations.station
LIMIT 5;
'''

In [25]:
weather_rainfall = pd.read_sql(query, conn)
weather_rainfall.head()

Unnamed: 0,record_id,station,date,precip,avg_temp,station.1,name,lat,lng,elevation
0,1,US1TNCH0002,1/1/13,0.38,,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.0984,-87.0963,177.1
1,2,US1TNCH0002,1/2/13,0.08,,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.0984,-87.0963,177.1
2,3,US1TNCH0002,1/3/13,0.0,,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.0984,-87.0963,177.1
3,4,US1TNCH0002,1/4/13,0.0,,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.0984,-87.0963,177.1
4,5,US1TNCH0002,1/5/13,0.0,,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.0984,-87.0963,177.1


## Another example using USING and other selection criteria

In [26]:
query = '''
SELECT *
FROM rainfall_2013
LEFT JOIN weather_stations USING(station, station)
WHERE weather_stations.name LIKE "%NASHVILLE%"
AND precip > 0.1
ORDER BY rainfall_2013.precip
LIMIT 5;
'''

In [27]:
weather_rainfall = pd.read_sql(query, conn)
weather_rainfall.head()

Unnamed: 0,record_id,station,date,precip,avg_temp,name,lat,lng,elevation
0,10092,USW00013897,1/14/13,0.11,,"NASHVILLE INTERNATIONAL AIRPORT, TN US",36.11889,-86.68917,182.9
1,10109,USW00013897,1/31/13,0.11,,"NASHVILLE INTERNATIONAL AIRPORT, TN US",36.11889,-86.68917,182.9
2,10117,USW00013897,2/8/13,0.11,,"NASHVILLE INTERNATIONAL AIRPORT, TN US",36.11889,-86.68917,182.9
3,10172,USW00013897,4/4/13,0.11,46.0,"NASHVILLE INTERNATIONAL AIRPORT, TN US",36.11889,-86.68917,182.9
4,10203,USW00013897,5/5/13,0.11,50.0,"NASHVILLE INTERNATIONAL AIRPORT, TN US",36.11889,-86.68917,182.9


## Best Practice - Close connections after use to prevent the tables from being locked

In [28]:
cur.close()
conn.close()