Skip to content

Commit b00a238

Browse files
committed
update
1 parent 36e8c54 commit b00a238

File tree

4 files changed

+16
-12
lines changed

4 files changed

+16
-12
lines changed

.idea/HtmlParser.iml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/workspace.xml

Lines changed: 6 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

html_parser.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
class HtmlParser:
1111
class CustomHTMLParser(HTMLParser):
12-
1312
def __init__(self):
1413
super().__init__()
1514
self.root = HtmlTag('root', {})
@@ -51,9 +50,6 @@ def __init__(self, url=None, html_s=None):
5150
if url:
5251
html_s = requests.get(url).content.decode('utf-8')
5352
self.content = html_s
54-
self.parse()
55-
56-
def parse(self):
5753
p = self.CustomHTMLParser()
5854
self.root = p.feed(self.content)
5955

@@ -95,6 +91,10 @@ def select(self, cmd):
9591
data = q.get()
9692
elem = data['element']
9793
selectors = data['selectors']
94+
95+
for child in elem.childrens:
96+
q.put({'element': child, 'selectors': selectors})
97+
9898
s_classes = None
9999
if '.' in selectors[0]:
100100
s_classes = self.get_group_list('.', selectors[0]) #list of classes
@@ -108,8 +108,7 @@ def select(self, cmd):
108108
if '#' in tag:
109109
tag = tag[:tag.find('#')]
110110

111-
for child in elem.childrens:
112-
q.put({'element': child, 'selectors': selectors})
111+
113112

114113
tag_check = True
115114
classes_check = True

main.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
import requests
21
from html_parser import HtmlParser
32

3+
p = HtmlParser(url='https://easypassword.ru')
4+
print(p.root)
5+
46
with open('index.html', 'r', encoding='utf-8') as f:
57
s = str(f.read())
68
p = HtmlParser(html_s=s)
7-
print(p.root.select('.logo p')[0].text)
9+
print(p.root)

0 commit comments

Comments
 (0)