-
Notifications
You must be signed in to change notification settings - Fork 2
/
Parser.py
48 lines (41 loc) · 1019 Bytes
/
Parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import requests
import csv
url = "https://www.vocabulary.com/lists/145774"
body = ""
words = []; definitions = []
def get_body():
global body
request = requests.get(url)
body = request.text
def write_words_to_file():
with open("site_html.txt", "w") as f:
f.write(body)
def parse_words():
global words, definitions
with open("site_html.txt", "r") as f:
num = 0
while True:
line = str(f.readline())
if line == '':
break
if "word dynamictext" in line:
line = line.split('/')[2]
line = line.split('"')[0]
words.append(line)
elif "definition" in line and num > 1:
line = line.split('>')[1]
line = line.split('<')[0]
definitions.append(line)
num += 1
elif "definition" in line:
num += 1
def save_data():
with open("data.csv", "w") as f:
writer = csv.writer(f)
writer.writerow(["ID", "Word", "Definition"])
for i in range(len(words)):
writer.writerow([i + 1, words[i], definitions[i]])
get_body()
# write_words_to_file()
parse_words()
save_data()