In [1]:
import sqlite3

In [2]:
sqlite_db = "test_db.sqlite"
conn = sqlite3.connect(sqlite_db)
cur = conn.cursor()

In [3]:
cur.execute('CREATE TABLE houses (field1 INTEGER PRIMARY KEY, sqft INTEGER, bdrms INTEGER, age INTEGER, price INTEGER);')

OperationalError: table houses already exists

In [4]:
conn.commit()

In [5]:
last_sale = (None,4000,5,22,619000)
cur.execute('INSERT INTO houses VALUES (?,?,?,?,?)',last_sale)
conn.commit()

In [6]:
recent_sales = [
    (None, 2390, 4, 34, 319000),
    (None, 1870, 3, 14, 289000),
    (None, 1505, 3, 90, 269000)
]

cur.executemany('INSERT INTO houses VALUES (?,?,?,?,?)', recent_sales)

conn.commit()

In [7]:
from numpy import genfromtxt

data = (genfromtxt('assets/datasets/housing-data.csv',delimiter=',',skip_header=1)).tolist()

for d in data:
    d.insert(0,None)

In [8]:
conn.commit()

In [9]:
results = cur.execute("SELECT * FROM houses WHERE bdrms = 4")

In [10]:
results.fetchall()

[(2, 2390, 4, 34, 319000), (6, 2390, 4, 34, 319000)]

In [12]:
import pandas as pd
data = pd.read_csv('assets/datasets/housing-data.csv')
data.head()

Unnamed: 0,sqft,bdrms,age,price
0,2104,3,70,399900
1,1600,3,28,329900
2,2400,3,44,369000
3,1416,2,49,232000
4,3000,4,75,539900


In [13]:
data.to_sql('houses_pandas',
           con = conn,
           if_exists='replace')

In [14]:
pd.read_sql('SELECT * FROM houses_pandas LIMIT 10;',con = conn)

Unnamed: 0,index,sqft,bdrms,age,price
0,0,2104,3,70,399900
1,1,1600,3,28,329900
2,2,2400,3,44,369000
3,3,1416,2,49,232000
4,4,3000,4,75,539900
5,5,1985,4,61,299900
6,6,1534,3,12,314900
7,7,1427,3,57,198999
8,8,1380,3,14,212000
9,9,1494,3,15,242500


## SQL SYNTAX

In [16]:
pd.read_sql('SELECT AVG(price), SUM(price)/2, MAX(price) FROM houses_pandas WHERE bdrms = 2', conn)

Unnamed: 0,AVG(price),SUM(price)/2,MAX(price)
0,280866.666667,842600,368500


In [62]:
from __future__ import division
q1 = "SELECT AVG(price) from houses_pandas WHERE bdrms=1"
q2 = "SELECT AVG(price) / 2 from houses_pandas WHERE bdrms=2"
q3 = "SELECT bdrms,COUNT(bdrms) as count from houses_pandas GROUP BY bdrms ORDER BY count DESC"
q4 = "SELECT COUNT(*) FROM houses_pandas WHERE bdrms=3"
q = "SELECT bdrms, COUNT(bdrms) * 100 / (SELECT COUNT(*) FROM houses_pandas) AS pct FROM houses_pandas where bdrms=3" % total
q6 = "SELECT MAX(age) FROM houses_pandas WHERE bdrms=3"
q7 = "SELECT MIN(age) FROM houses_pandas"
q8 = "SELECT AVG(age) from houses_pandas"
q9 = "SELECT AVG(age) from houses_pandas GROUP BY bdrms"
pd.read_sql(q,conn)

Unnamed: 0,bdrms,pct
0,3,53


## Connect to external database

In [17]:
from sqlalchemy import create_engine

In [20]:
connect_param = 'postgresql://dsi_student:gastudents@dsi.c20gkj5cvu3l.us-east-1.rds.amazonaws.com:5432/northwind'
engine = create_engine(connect_param)
pd.read_sql("SELECT * FROM pg_catalog.pg_tables WHERE schemaname='public'", con=engine)

Unnamed: 0,schemaname,tablename,tableowner,tablespace,hasindexes,hasrules,hastriggers
0,public,categories,dsi,,True,False,False
1,public,customercustomerdemo,dsi,,True,False,False
2,public,customerdemographics,dsi,,True,False,False
3,public,customers,dsi,,True,False,False
4,public,employees,dsi,,True,False,False
5,public,employeeterritories,dsi,,True,False,False
6,public,order_details,dsi,,True,False,False
7,public,orders,dsi,,True,False,False
8,public,products,dsi,,True,False,False
9,public,region,dsi,,True,False,False
