In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:100% !important; }</stle>"))

# Database connection

In [2]:
import sqlite3
import pandas as pd
import os

## Typo example - bussssss.db

In [3]:
conn = sqlite3.connect("bussssss.db")
conn

<sqlite3.Connection at 0x7f88bc79ea80>

### pd.read_sql("QUERY", CONNECTION)
* Query: SELECT * FROM sqlite_master

In [4]:
pd.read_sql("SELECT * FROM sqlite_master", conn)

Unnamed: 0,type,name,tbl_name,rootpage,sql


### Let's close this bad connection

In [5]:
conn.close()

### Now let's open the correct bus.db

In [6]:
path = "bus.db"
assert os.path.exists(path)
conn = sqlite3.connect(path)
conn

<sqlite3.Connection at 0x7f88bd157a80>

In [7]:
df = pd.read_sql("SELECT * FROM sqlite_master", conn)
df

Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,boarding,boarding,2,"CREATE TABLE ""boarding"" (\n""index"" INTEGER,\n ..."
1,index,ix_boarding_index,boarding,3,"CREATE INDEX ""ix_boarding_index""ON ""boarding"" ..."
2,table,routes,routes,55,"CREATE TABLE ""routes"" (\n""index"" INTEGER,\n ""..."
3,index,ix_routes_index,routes,57,"CREATE INDEX ""ix_routes_index""ON ""routes"" (""in..."


### Extract sql column from the DataFrame

In [8]:
df["sql"]

0    CREATE TABLE "boarding" (\n"index" INTEGER,\n ...
1    CREATE INDEX "ix_boarding_index"ON "boarding" ...
2    CREATE TABLE "routes" (\n"index" INTEGER,\n  "...
3    CREATE INDEX "ix_routes_index"ON "routes" ("in...
Name: sql, dtype: object

### Let's iterate over df["sql"]

In [9]:
for code in df["sql"]:
    print(code + "\n")

CREATE TABLE "boarding" (
"index" INTEGER,
  "StopID" INTEGER,
  "Route" INTEGER,
  "Lat" REAL,
  "Lon" REAL,
  "DailyBoardings" REAL
)

CREATE INDEX "ix_boarding_index"ON "boarding" ("index")

CREATE TABLE "routes" (
"index" INTEGER,
  "OBJECTID" INTEGER,
  "trips_routes_route_id" INTEGER,
  "route_short_name" INTEGER,
  "route_url" TEXT,
  "ShapeSTLength" REAL
)

CREATE INDEX "ix_routes_index"ON "routes" ("index")



### Query: SELECT * FROM \<table_name\>

In [10]:
pd.read_sql("SELECT * FROM routes", conn)

Unnamed: 0,index,OBJECTID,trips_routes_route_id,route_short_name,route_url,ShapeSTLength
0,0,63,8052,1,http://www.cityofmadison.com/Metro/schedules/R...,32379.426524
1,1,64,8053,2,http://www.cityofmadison.com/Metro/schedules/R...,96906.965571
2,2,65,8054,3,http://www.cityofmadison.com/Metro/schedules/R...,76436.645644
3,3,66,8055,4,http://www.cityofmadison.com/Metro/schedules/R...,64774.133485
4,4,67,8056,5,http://www.cityofmadison.com/Metro/schedules/R...,61216.722662
...,...,...,...,...,...,...
57,57,120,8109,78,http://www.cityofmadison.com/Metro/schedules/R...,95826.277218
58,58,121,8110,80,http://www.cityofmadison.com/Metro/schedules/R...,31831.761009
59,59,122,8111,81,http://www.cityofmadison.com/Metro/schedules/R...,26536.800591
60,60,123,8112,82,http://www.cityofmadison.com/Metro/schedules/R...,23287.980173


In [11]:
df = pd.read_sql("SELECT * FROM boarding", conn)
df.head(20)

Unnamed: 0,index,StopID,Route,Lat,Lon,DailyBoardings
0,0,1163,27,43.073655,-89.385427,1.03
1,1,1163,47,43.073655,-89.385427,0.11
2,2,1163,75,43.073655,-89.385427,0.34
3,3,1164,6,43.106465,-89.340021,10.59
4,4,1167,3,43.077867,-89.369993,3.11
5,5,1167,4,43.077867,-89.369993,2.23
6,6,1167,10,43.077867,-89.369993,0.11
7,7,1167,38,43.077867,-89.369993,1.36
8,8,1169,3,43.089707,-89.329817,18.9
9,9,1169,37,43.089707,-89.329817,1.35


### Adding LIMIT to the query
* SELECT * FROM \<table_name\> LIMIT \<num_of_rows\>

In [12]:
pd.read_sql("SELECT * FROM boarding LIMIT 5", conn)

Unnamed: 0,index,StopID,Route,Lat,Lon,DailyBoardings
0,0,1163,27,43.073655,-89.385427,1.03
1,1,1163,47,43.073655,-89.385427,0.11
2,2,1163,75,43.073655,-89.385427,0.34
3,3,1164,6,43.106465,-89.340021,10.59
4,4,1167,3,43.077867,-89.369993,3.11


# How many people get on a bus in Madison every day?
- we are interested in boarding table to answer this question

In [13]:
#Answer using pandas
qry = """
    SELECT DailyBoardings 
    FROM boarding
"""
df = pd.read_sql(qry, conn)
bus_riders = df["DailyBoardings"]
bus_riders.sum()

55987.18

In [14]:
#Using SQL summarization
qry = """
    SELECT SUM(DailyBoardings)
    FROM boarding
"""
pd.read_sql(qry, conn)

Unnamed: 0,SUM(DailyBoardings)
0,55987.18


# Go West - which bus should I take to go as far west as possible?
- Smallest Longitude

In [15]:
qry = """
    SELECT *
    FROM boarding
    ORDER BY Lon ASC
    LIMIT 1
"""
pd.read_sql(qry, conn)

Unnamed: 0,index,StopID,Route,Lat,Lon,DailyBoardings
0,3489,4400,55,42.995476,-89.564243,59.31


In [16]:
conn.close()