Skip to content

Commit

Permalink
fix: update sysu/sdcs router, fix #2905 (#2906)
Browse files Browse the repository at this point in the history
  • Loading branch information
Neutrino3316 authored and DIYgod committed Aug 21, 2019
1 parent 3ef4ca0 commit 5199ebb
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 47 deletions.
2 changes: 1 addition & 1 deletion docs/university.md
Original file line number Diff line number Diff line change
Expand Up @@ -1009,4 +1009,4 @@ https://rsshub.app/**nuist**/`bulletin` 或 https://rsshub.app/**nuist**/`bullet

### 数据科学与计算机学院动态

<Route author="MegrezZhu" example="/sysu/sdcs" path="/sysu/sdcs" />
<Route author="Neutrino3316 MegrezZhu" example="/sysu/sdcs" path="/sysu/sdcs" />
140 changes: 94 additions & 46 deletions lib/routes/universities/sysu/sdcs.js
Original file line number Diff line number Diff line change
@@ -1,56 +1,104 @@
const got = require('@/utils/got');
const { load } = require('cheerio');
const { resolve } = require('url');

const base = 'http://sdcs.sysu.edu.cn/';
const cheerio = require('cheerio');

module.exports = async (ctx) => {
const { data } = await got.get('http://sdcs.sysu.edu.cn/');
const $ = load(data);

const urls = $('.view-content li > a')
.slice(0, 10)
.map((_, ele) => $(ele).attr('href'))
.toArray()
.map((path) => path.match(/\/content\/(\d+)/)[1]) // extract article-id
.sort()
.reverse() // sort by article-id (or to say, date), latest first
.map((aid) => resolve(base, `/content/${aid}`));

ctx.state.data = {
title: '中山大学 - 数据科学与计算机学院',
link: 'http://sdcs.sysu.edu.cn/',
description: '中山大学 - 数据科学与计算机学院',
item: await getDetails(ctx.cache, urls),
};
};
const response = await got({
method: 'get',
url: 'http://sdcs.sysu.edu.cn/',
headers: {
Referer: `http://sdcs.sysu.edu.cn/`,
},
});
const $ = cheerio.load(response.data);

const getDetails = (cache, urls) => Promise.all(urls.map((url) => cache.tryGet(url, () => getDetail(url))));
// 首页有多个板块,每个板块的css选择器不同,而且每个板块的信息分类也不一样
const block_index = [
{
index: 1,
description_header: '学院新闻',
},
{
index: 2,
description_header: '学院通知',
},
{
index: 3,
description_header: '人才招聘',
},
{
index: 4,
description_header: '学术活动',
},
{
index: 5,
description_header: '学工通知',
},
{
index: 6,
description_header: '学生活动',
},
{
index: 7,
description_header: '教务通知',
},
{
index: 8,
description_header: '科研通知',
},
{
index: 9,
description_header: '人事通知',
},
{
index: 10,
description_header: '党群工作',
},
{
index: 11,
description_header: '校友工作',
},
{
index: 12,
description_header: '社会工作',
},
];

const timezone = 8;
const serverOffset = new Date().getTimezoneOffset() / 60;
const shiftTimezone = (date) => new Date(date.getTime() - 60 * 60 * 1000 * (timezone + serverOffset)).toUTCString();
function getDetail(item, description_header) {
return {
title: description_header + ': ' + item.attribs.title,
description: description_header + ': ' + item.attribs.title,
link: item.attribs.href,
category: description_header,
};
}

const getDetail = async (url) => {
const { data } = await got.get(url);
const $ = load(data);
const item_data = [];
for (let i = 0; i < block_index.length; i++) {
const block_news = $('#block-views-homepage-block-' + block_index[i].index + '> div > div.view-content > div > ul > li > a');
for (let j = 0; j < block_news.length; j++) {
console.log(block_news[j]);
item_data.push(getDetail(block_news[j], block_index[i].description_header));
}
}

// transforming images
$('.content img').each((_, ele) => {
$(ele).attr('referrerpolicy', 'no-referrer');
$(ele).attr('src', resolve(base, $(ele).attr('src')));
});
function compareLink(a, b) {
let a_str = a.link;
a_str = a_str.substr(a_str.length - 4, 4);
const a_int = parseInt(a_str);
let b_str = b.link;
b_str = b_str.substr(b_str.length - 4, 4);
const b_int = parseInt(b_str);
return b_int - a_int;
}
// 使得新的通知排在前面,假设通知的发布和链接地址是相关的,而且链接地址都是"/content/4961"这样,只有四位数的。
item_data.sort(compareLink);
// console.log(item_data);

return {
title: $('section > h1').text(),
description: $('.content').html(),
link: url,
pubDate: shiftTimezone(
new Date(
$('.submitted-by')
.text()
.match(/(\d+\/\d+\/\d+)/)[1]
)
),
ctx.state.data = {
title: `中山大学 - 数据科学与计算机学院`,
link: `http://sdcs.sysu.edu.cn`,
description: `中山大学 - 数据科学与计算机学院`,
language: `zh-cn`,
item: item_data,
};
};

0 comments on commit 5199ebb

Please sign in to comment.