-
Notifications
You must be signed in to change notification settings - Fork 1
/
demo.py
100 lines (92 loc) · 3.48 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
demo.py - a web scraping demo using Beautiful Soup and Requests
by Brian Sauer for CincyPy
"""
import requests
from bs4 import BeautifulSoup
import datetime
class KenoDrawing(object):
"""KenoDrawing: a class to hold a specific Keno drawings"""
def __init__(self, date, ID, numbers, booster):
"""
KenoDrawing.__init__(): sets values representing a Keno Drawing
Params:
- date (datetime) : a datetime representing the time of the drawing
- ID (string) : the Keno drawing number
- numbers (list) : a list of ints for the keno draw
- booster (int) : the Keno booster value
Returns:
- Nothing
"""
self.date = date
self.ID = ID
self.numbers = numbers
self.booster = booster
def __str__(self):
"""
KenoDrawing.__str__(): returns a string representation of a KenoDrawing class
Params:
- Nothing
Returns:
- (string) : the string representation of this object
"""
return "{} ==> {} : {}".format(self.ID, self.numbers, self.booster)
def get_html(url):
"""
get_html() : fetches HTML from a URL using the requests library
Params:
- url (string) : url to pull from
Returns:
- (string) : the HTML from the page
"""
response = requests.get(url)
return response.text
def get_drawings(year, month, day):
"""
get_drawings() : scrapes paginated tables to get a day's Keno drawings
Params:
- year (int) : the year for the drawing
- month (int) : the month for the drawing
- day (int) : the day for the drawing
Returns:
- (list) : a list of KenoDrawing objects
"""
drawings = []
id_set = set()
page = 1
done = False
while not done:
#fetch HTML
html = get_html("https://www.ohiolottery.com/WinningNumbers/KenoDrawings/KenoDrawingsArchive?date={}/{}/{}&page={}".format(month, day, year, page))
#get a soup object for the HTML
soup = BeautifulSoup(html, "html.parser")
#find a table with a css class of keno_drawings
table_soup = soup.find("table", class_="keno_drawings")
#from the table, look for all of the tr elements inside the child tbody tag
trs = table_soup.tbody.find_all("tr")
for tr in trs:
#for each row, find all the td tags in that row
tds = tr.find_all("td")
#if the row doesn't have 22 td's, its a header row, so skip
if len(tds) != 22:
print("Skipping: row has only " + str(len(tr.contents)) + " children.")
else:
#build a KenoDrawing object and store
date = datetime.datetime(year,month,day)
ID = tds[0].string
numbers = [tds[x].string for x in range(1,21)]
booster = tds[21].string
#have we seen this drawing ID before? if not, add, otherwise break
if ID not in id_set:
id_set.add(ID)
drawings.append(KenoDrawing(date, ID, numbers, booster))
else:
print("ID {} already set, breaking on page {}.".format(ID,page))
done = True #set while loop to break next go around
break #break for loop
page+=1
return drawings
if __name__ == "__main__":
drawings = get_drawings(2018,2,1)
print("Successfully scraped {} drawings.".format(len(drawings)))
print("Done")