# Dow Database Solution

The database table in the file 'dow2008.csv' has records holding the
daily performance of the Dow Jones Industrial Average from the
beginning of 2008.  The table has the following columns (separated by
a comma).

    DATE        OPEN      HIGH      LOW       CLOSE     VOLUME      ADJ_CLOSE
    2008-01-02  13261.82  13338.23  12969.42  13043.96  3452650000  13043.96
    2008-01-03  13044.12  13197.43  12968.44  13056.72  3429500000  13056.72
    2008-01-04  13046.56  13049.65  12740.51  12800.18  4166000000  12800.18
    2008-01-07  12801.15  12984.95  12640.44  12827.49  4221260000  12827.49
    2008-01-08  12820.9   12998.11  12511.03  12589.07  4705390000  12589.07
    2008-01-09  12590.21  12814.97  12431.53  12735.31  5351030000  12735.31

1. Create a database table that has the same structure (use real
   for all the columns except the date column).

2. Insert all the records from dow.csv into the database.

3. Select (and print out) the records from the database that have a volume
   greater than 5.5 billion.   How many are there?

Bonus
~~~~~
1. Select the records which have a spread between high and low that is greater
   than 4% and sort them in order of that spread.

2. Select the records which have an absolute difference between open and close
   that is greater than 1% (of the open) and sort them in order of that spread.

In [1]:
import sqlite3 as db

# 1.

conn = db.connect(':memory:')
c = conn.cursor()

sql = """create table dow(date date, open float, high float, low float,
  close float, volume float, adj_close float)"""
c.execute(sql)

<sqlite3.Cursor at 0x103a31f10>

In [2]:
# 2.

f = open('dow2008.csv')
headers = f.readline()

sql = "insert into dow values(?,?,?,?,?,?,?)"
for line in f:
    c.execute(sql, line.strip().split(','))

conn.commit()

In [3]:
# 3.

sql = "select * from dow where volume > ?"
c.execute(sql, (5.5e9,))

N = 0
for row in c:
    print row
    N += 1

print "Number = ", N

(u'2008-01-18', 12159.94, 12441.85, 11953.71, 12099.3, 6004840000.0, 12099.3)
(u'2008-01-22', 12092.72, 12167.42, 11508.74, 11971.19, 6544690000.0, 11971.19)
(u'2008-01-24', 12272.69, 12522.82, 12114.83, 12378.61, 5735300000.0, 12378.61)
(u'2008-03-17', 11946.45, 12119.69, 11650.44, 11972.25, 5683010000.0, 11972.25)
(u'2008-03-20', 12102.43, 12434.34, 12024.68, 12361.32, 6145220000.0, 12361.32)
(u'2008-06-27', 11452.85, 11556.33, 11248.48, 11346.51, 6208260000.0, 11346.51)
(u'2008-07-01', 11344.64, 11465.79, 11106.65, 11382.26, 5846290000.0, 11382.26)
(u'2008-07-08', 11225.03, 11459.52, 11101.19, 11384.21, 6034110000.0, 11384.21)
(u'2008-07-10', 11148.01, 11351.24, 11006.01, 11229.02, 5840430000.0, 11229.02)
(u'2008-07-11', 11226.17, 11292.04, 10908.64, 11100.54, 6742200000.0, 11100.54)
(u'2008-07-15', 11050.8, 11201.67, 10731.96, 10962.54, 7363640000.0, 10962.54)
(u'2008-07-16', 10961.89, 11308.41, 10831.61, 11239.28, 6738630400.0, 11239.28)
(u'2008-07-17', 11238.39, 11538.5, 11118.46

In [4]:
# Bonus 1
sql = "select * from dow where (high-low)/low > ? order by (high-low)/low"
c.execute(sql, (0.04,))

N = 0
for row in c:
    print row
    N += 1
print "Bonus 1 number of rows: ", N

(u'2008-09-11', 11264.44, 11461.15, 11018.72, 11433.71, 6869249600.0, 11433.71)
(u'2008-01-25', 12391.7, 12590.69, 12103.61, 12207.17, 4882250000.0, 12207.17)
(u'2008-03-17', 11946.45, 12119.69, 11650.44, 11972.25, 5683010000.0, 11972.25)
(u'2008-09-26', 11019.04, 11218.48, 10781.37, 11143.13, 5383610000.0, 11143.13)
(u'2008-01-18', 12159.94, 12441.85, 11953.71, 12099.3, 6004840000.0, 12099.3)
(u'2008-08-08', 11432.09, 11808.49, 11344.23, 11734.32, 4966810000.0, 11734.32)
(u'2008-12-10', 8693.0, 8942.46, 8589.86, 8761.42, 5942130000.0, 8761.42)
(u'2008-11-04', 9323.89, 9711.46, 9323.89, 9625.28, 5531290000.0, 9625.28)
(u'2008-09-23', 11015.69, 11214.65, 10763.77, 10854.17, 5185730000.0, 10854.17)
(u'2008-01-17', 12467.05, 12597.85, 12089.38, 12159.21, 5303130000.0, 12159.21)
(u'2008-11-07', 8696.03, 9032.54, 8661.22, 8943.81, 4931640000.0, 8943.81)
(u'2008-07-15', 11050.8, 11201.67, 10731.96, 10962.54, 7363640000.0, 10962.54)
(u'2008-07-16', 10961.89, 11308.41, 10831.61, 11239.28, 6738

In [5]:
# Bonus 2
sql = "select * from dow where abs(open-close)/open > ? order by abs(open-close)/open"
c.execute(sql, (0.01,))

N = 0
for row in c:
    print row
    N += 1
print "Bonus 2 number of rows: ", N

(u'2008-01-22', 12092.72, 12167.42, 11508.74, 11971.19, 6544690000.0, 11971.19)
(u'2008-06-02', 12637.67, 12645.4, 12385.76, 12503.82, 3714320000.0, 12503.82)
(u'2008-06-18', 12158.68, 12212.33, 11947.07, 12029.06, 4573570000.0, 12029.06)
(u'2008-02-12', 12241.56, 12524.12, 12207.9, 12373.41, 4044640000.0, 12373.41)
(u'2008-12-17', 8921.91, 9001.96, 8701.13, 8824.34, 5907380000.0, 8824.34)
(u'2008-07-11', 11226.17, 11292.04, 10908.64, 11100.54, 6742200000.0, 11100.54)
(u'2008-05-23', 12620.9, 12637.43, 12420.2, 12479.63, 3516380000.0, 12479.63)
(u'2008-09-26', 11019.04, 11218.48, 10781.37, 11143.13, 5383610000.0, 11143.13)
(u'2008-08-19', 11478.09, 11501.45, 11260.53, 11348.55, 4159760000.0, 11348.55)
(u'2008-02-21', 12426.85, 12545.79, 12225.36, 12284.3, 3696660000.0, 12284.3)
(u'2008-01-09', 12590.21, 12814.97, 12431.53, 12735.31, 5351030000.0, 12735.31)
(u'2008-12-23', 8518.65, 8647.6, 8376.8, 8419.49, 4051970000.0, 8419.49)
(u'2008-08-12', 11781.7, 11830.39, 11541.43, 11642.47, 471