-
-
Notifications
You must be signed in to change notification settings - Fork 6.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
- Loading branch information
1 parent
3ef4ca0
commit 5199ebb
Showing
2 changed files
with
95 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,56 +1,104 @@ | ||
const got = require('@/utils/got'); | ||
const { load } = require('cheerio'); | ||
const { resolve } = require('url'); | ||
|
||
const base = 'http://sdcs.sysu.edu.cn/'; | ||
const cheerio = require('cheerio'); | ||
|
||
module.exports = async (ctx) => { | ||
const { data } = await got.get('http://sdcs.sysu.edu.cn/'); | ||
const $ = load(data); | ||
|
||
const urls = $('.view-content li > a') | ||
.slice(0, 10) | ||
.map((_, ele) => $(ele).attr('href')) | ||
.toArray() | ||
.map((path) => path.match(/\/content\/(\d+)/)[1]) // extract article-id | ||
.sort() | ||
.reverse() // sort by article-id (or to say, date), latest first | ||
.map((aid) => resolve(base, `/content/${aid}`)); | ||
|
||
ctx.state.data = { | ||
title: '中山大学 - 数据科学与计算机学院', | ||
link: 'http://sdcs.sysu.edu.cn/', | ||
description: '中山大学 - 数据科学与计算机学院', | ||
item: await getDetails(ctx.cache, urls), | ||
}; | ||
}; | ||
const response = await got({ | ||
method: 'get', | ||
url: 'http://sdcs.sysu.edu.cn/', | ||
headers: { | ||
Referer: `http://sdcs.sysu.edu.cn/`, | ||
}, | ||
}); | ||
const $ = cheerio.load(response.data); | ||
|
||
const getDetails = (cache, urls) => Promise.all(urls.map((url) => cache.tryGet(url, () => getDetail(url)))); | ||
// 首页有多个板块,每个板块的css选择器不同,而且每个板块的信息分类也不一样 | ||
const block_index = [ | ||
{ | ||
index: 1, | ||
description_header: '学院新闻', | ||
}, | ||
{ | ||
index: 2, | ||
description_header: '学院通知', | ||
}, | ||
{ | ||
index: 3, | ||
description_header: '人才招聘', | ||
}, | ||
{ | ||
index: 4, | ||
description_header: '学术活动', | ||
}, | ||
{ | ||
index: 5, | ||
description_header: '学工通知', | ||
}, | ||
{ | ||
index: 6, | ||
description_header: '学生活动', | ||
}, | ||
{ | ||
index: 7, | ||
description_header: '教务通知', | ||
}, | ||
{ | ||
index: 8, | ||
description_header: '科研通知', | ||
}, | ||
{ | ||
index: 9, | ||
description_header: '人事通知', | ||
}, | ||
{ | ||
index: 10, | ||
description_header: '党群工作', | ||
}, | ||
{ | ||
index: 11, | ||
description_header: '校友工作', | ||
}, | ||
{ | ||
index: 12, | ||
description_header: '社会工作', | ||
}, | ||
]; | ||
|
||
const timezone = 8; | ||
const serverOffset = new Date().getTimezoneOffset() / 60; | ||
const shiftTimezone = (date) => new Date(date.getTime() - 60 * 60 * 1000 * (timezone + serverOffset)).toUTCString(); | ||
function getDetail(item, description_header) { | ||
return { | ||
title: description_header + ': ' + item.attribs.title, | ||
description: description_header + ': ' + item.attribs.title, | ||
link: item.attribs.href, | ||
category: description_header, | ||
}; | ||
} | ||
|
||
const getDetail = async (url) => { | ||
const { data } = await got.get(url); | ||
const $ = load(data); | ||
const item_data = []; | ||
for (let i = 0; i < block_index.length; i++) { | ||
const block_news = $('#block-views-homepage-block-' + block_index[i].index + '> div > div.view-content > div > ul > li > a'); | ||
for (let j = 0; j < block_news.length; j++) { | ||
console.log(block_news[j]); | ||
item_data.push(getDetail(block_news[j], block_index[i].description_header)); | ||
} | ||
} | ||
|
||
// transforming images | ||
$('.content img').each((_, ele) => { | ||
$(ele).attr('referrerpolicy', 'no-referrer'); | ||
$(ele).attr('src', resolve(base, $(ele).attr('src'))); | ||
}); | ||
function compareLink(a, b) { | ||
let a_str = a.link; | ||
a_str = a_str.substr(a_str.length - 4, 4); | ||
const a_int = parseInt(a_str); | ||
let b_str = b.link; | ||
b_str = b_str.substr(b_str.length - 4, 4); | ||
const b_int = parseInt(b_str); | ||
return b_int - a_int; | ||
} | ||
// 使得新的通知排在前面,假设通知的发布和链接地址是相关的,而且链接地址都是"/content/4961"这样,只有四位数的。 | ||
item_data.sort(compareLink); | ||
// console.log(item_data); | ||
|
||
return { | ||
title: $('section > h1').text(), | ||
description: $('.content').html(), | ||
link: url, | ||
pubDate: shiftTimezone( | ||
new Date( | ||
$('.submitted-by') | ||
.text() | ||
.match(/(\d+\/\d+\/\d+)/)[1] | ||
) | ||
), | ||
ctx.state.data = { | ||
title: `中山大学 - 数据科学与计算机学院`, | ||
link: `http://sdcs.sysu.edu.cn`, | ||
description: `中山大学 - 数据科学与计算机学院`, | ||
language: `zh-cn`, | ||
item: item_data, | ||
}; | ||
}; |