Skip to content

Commit

Permalink
允许指定 title_xpath
Browse files Browse the repository at this point in the history
  • Loading branch information
kingname committed Sep 22, 2019
1 parent b02841f commit 00bc70c
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions gne/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ def __init__(self):
self.author_extractor = AuthorExtractor()
self.time_extractor = TimeExtractor()

def extract(self, html, noise_node_list=None):
def extract(self, html, title_xpath='', noise_node_list=None):
element = pre_parse(html)
remove_noise_node(element, noise_node_list)
content = self.content_extractor.extract(element)
title = self.title_extractor.extract(element)
title = self.title_extractor.extract(element, title_xpath=title_xpath)
publish_time = self.time_extractor.extractor(element)
author = self.author_extractor.extractor(element)
return {'title': title,
Expand Down

0 comments on commit 00bc70c

Please sign in to comment.