Skip to content

Commit

Permalink
fix some bug
Browse files Browse the repository at this point in the history
  • Loading branch information
hiltay committed Feb 10, 2023
1 parent 41efc6d commit 33d44ae
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 39 deletions.
3 changes: 3 additions & 0 deletions hexo_circle_of_friends/fc_settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ BLOCK_SITE: [
# ^http://, # http开头的全部屏蔽
]

# 每个link主页最多获取几篇文章,此值越大,则抓取的文章上限越多,相应地运行速度也会降低,反之亦然
# 请设置一个正整数
MAX_POSTS_NUM: 5

# 启用HTTP代理,此项设为true,并且需要添加一个环境变量,名称为PROXY,值为[IP]:[端口],比如:192.168.1.106:8080
HTTP_PROXY: false
Expand Down
65 changes: 31 additions & 34 deletions hexo_circle_of_friends/spiders/hexo_circle_of_friends.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,13 @@ def start_requests(self):
# 初始化起始请求链接
friendpage_link, friendpage_theme = self.init_start_urls()
self.start_urls.extend(friendpage_link)
for i, url in enumerate(self.start_urls):
logger.info(f"起始url: {url}")
yield Request(url, callback=self.friend_poor_parse, meta={"theme": friendpage_theme[i]})
if self.start_urls:
for i, url in enumerate(self.start_urls):
logger.info(f"起始url: {url}")
yield Request(url, callback=self.friend_poor_parse, meta={"theme": friendpage_theme[i]})
elif not self.friend_poor.empty():
yield Request(li[1], callback=self.friend_poor_parse)


def init_start_urls(self):
friendpage_link = []
Expand Down Expand Up @@ -203,33 +207,26 @@ def post_feed_parse(self, response):
d = feedparser.parse(response.text)
version = d.version
entries = d.entries
l = len(entries) if len(entries) < 5 else 5
success_num = 0
try:
init_post_info = self.init_post_info(friend, version)
for i in range(l):
entry = entries[i]
for entry in entries:
if success_num > self.settings["MAX_POSTS_NUM"]:
break
# 标题
title = entry.title
# 链接
link = entry.link
self.process_link(link, friend[1])
# 创建时间
try:
created = entry.published_parsed
except:
try:
created = entry.created_parsed
except:
created = entry.updated_parsed
created = entry.get("published_parsed", entry.get("created_parsed", entry.get("updated_parsed")))
if not created:
continue
entrycreated = "{:4d}-{:02d}-{:02d}".format(created[0], created[1], created[2])
# 更新时间
try:
updated = entry.updated_parsed
except:
try:
updated = entry.created_parsed
except:
updated = entry.published_parsed
updated = entry.get("updated_parsed", entry.get("created_parsed", entry.get("published_parsed")))
if not updated:
continue
entryupdated = "{:4d}-{:02d}-{:02d}".format(updated[0], updated[1], updated[2])

yield self.generate_postinfo(
Expand Down Expand Up @@ -394,19 +391,19 @@ def process_theme_postinfo(self, friend, links, titles, createds, updateds, rule
:param updateds: 解析出的文章更新时间列表
:param rule: 来自于哪个解析器(解析规则)
"""
# 文章url不超过5篇
l = len(links) if len(links) < 5 else 5
# 文章url不超过MAX_POSTS_NUM篇
length = len(links) if len(links) < self.settings["MAX_POSTS_NUM"] else self.settings["MAX_POSTS_NUM"]
# 处理标题列表
titles = self.process_title(titles, l)
titles = self.process_title(titles, length)
# 处理创建时间和更新时间列表
createds, updateds = self.process_time(createds, updateds, l)
createds, updateds = self.process_time(createds, updateds, length)
# 初始化文章信息数据
init_post_info = self.init_post_info(friend, rule)
# 如果既没有创建时间也没有更新时间则丢弃
if not createds and not updateds:
raise
# 拼接文章信息
for i in range(l):
for i in range(length):
link = self.process_link(links[i], friend[1])
yield self.generate_postinfo(
init_post_info,
Expand All @@ -430,28 +427,28 @@ def process_link(self, link, domain):
link = domain + link.lstrip("/")
return link

def process_title(self, titles, lenth):
def process_title(self, titles, length):
"""
将title去除换行和回车以及两边的空格,并处理为长度不超过lenth的数组并返回
:param titles: 文章标题列表
:param lenth: 列表最大长度限制(取决于文章url列表)
:param length: 列表最大长度限制(取决于文章url列表)
"""
if not titles:
return None
for i in range(lenth):
for i in range(length):
if i < len(titles):
titles[i] = titles[i].replace("\r", "").replace("\n", "").strip()
else:
# 如果url存在,但title不存在,会将title设置为"无题"
titles.append("无题")
return titles[:lenth]
return titles[:length]

def process_time(self, createds, updateds, lenth):
def process_time(self, createds, updateds, length):
"""
将创建时间和更新时间格式化,并处理为长度统一且不超过lenth的数组并返回
:param createds: 创建时间列表
:param updateds: 更新时间列表
:param lenth: 列表最大长度限制(取决于文章url列表)
:param length: 列表最大长度限制(取决于文章url列表)
"""
# if both list are empty,return as fast as possible.
if not createds and not updateds:
Expand All @@ -461,7 +458,7 @@ def process_time(self, createds, updateds, lenth):
u_len = len(updateds)
co = min(c_len, u_len)
# 格式化长度
for i in range(lenth):
for i in range(length):
if i < co:
# 交集部分
createds[i] = createds[i].replace("\r", "").replace("\n", "").strip()
Expand All @@ -475,13 +472,13 @@ def process_time(self, createds, updateds, lenth):
createds[i] = createds[i].replace("\r", "").replace("\n", "").strip()
updateds.append(createds[i])
else:
# 长度超出createds和updateds且小于lenth,用当前时间填充
# 长度超出createds和updateds且小于length,用当前时间填充
createds.append(self.today)
updateds.append(self.today)
# 格式化时间
format_time(createds)
format_time(updateds)
return createds[:lenth], updateds[:lenth]
return createds[:length], updateds[:length]

def generate_postinfo(self, init_post_info, title, created, updated, link):
post_info = init_post_info
Expand Down
8 changes: 3 additions & 5 deletions hexo_circle_of_friends/utils/get_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ def get_stellar_url(self, response, queue):
self.handle(avatar, link, name, queue, "stellar")

def handle(self, avatar, link, name, queue, theme):
user_info = []
if len(avatar) == len(link) == len(name):
...
else:
Expand All @@ -148,21 +147,20 @@ def handle(self, avatar, link, name, queue, theme):
if link[i] == "":
# 初步筛选掉不符合规则的link
continue
user_info = []
user_info.append(name[i])
user_info.append(link[i])
user_info.append(avatar[i])
queue.put(user_info)
user_info = []

def Yun_async_link_handler(self, response, queue):
user_info = []
friends = json.loads(response.text)
for friend in friends:
user_info = []
name = friend["name"]
link = friend["url"]
avatar = friend["avatar"]
user_info.append(name)
user_info.append(link)
user_info.append(avatar)
queue.put(user_info)
user_info = []
queue.put(user_info)

0 comments on commit 33d44ae

Please sign in to comment.