/
db.py
71 lines (48 loc) · 1.67 KB
/
db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os.path
import sqlite3
import Scraper
import sys
def create_db():
conn = sqlite3.connect('reellog.db')
c = conn.cursor()
c.execute('''CREATE TABLE reellog
(lure text, body text, location text, species text, level integer, weight real, class text,
unique(lure, body, location, species, level, weight, class))''')
conn.commit()
conn.close()
def sample_db_entry():
scrape_data = "'Culprit Worm', 'Amazon River', 'Baia de Santa Rosa', 'Matrincha', '6', '0.062', 'Wimpy III'"
command = "INSERT INTO reellog VALUES (%s)" % scrape_data
conn = sqlite3.connect('reellog.db')
c = conn.cursor()
c.execute(command)
conn.commit()
conn.close()
def parse_and_store(html_file_path):
conn = sqlite3.connect('reellog.db')
c = conn.cursor()
c.execute("SELECT COUNT(*) from reellog")
(old_entry_count, ) = c.fetchone()
to_write = Scraper.scrape(html_file_path)
for row in to_write:
command = "INSERT INTO reellog VALUES (%s)" % row
try:
c.execute(command)
print('+ %s' % row)
except sqlite3.IntegrityError:
print('= %s' % row)
conn.commit()
c.execute("SELECT COUNT(*) from reellog")
(new_entry_count,) = c.fetchone()
conn.close()
print("%i new entries added" % (int(new_entry_count) - int(old_entry_count)))
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Need one argument: path to html_file", file=sys.stderr)
sys.exit(1)
if not os.path.isfile('reellog.db'):
print('No reellog.db found, creating')
create_db()
parse_and_store(sys.argv[1])
# sample_db_entry()
print('Done')