Skip to content
This repository has been archived by the owner on Jun 13, 2022. It is now read-only.

Commit

Permalink
feat: 新增百度搜索路由
Browse files Browse the repository at this point in the history
  • Loading branch information
CaoMeiYouRen committed Jun 13, 2020
1 parent 5bb3589 commit c429a75
Show file tree
Hide file tree
Showing 15 changed files with 336 additions and 8 deletions.
1 change: 1 addition & 0 deletions .eslintrc.js
Expand Up @@ -74,5 +74,6 @@ module.exports = {
'no-useless-constructor': [2], // 禁用不必要的构造函数
'spaced-comment': [2, 'always'], // 要求或禁止在注释前有空白
'sort-imports': [0], // import 排序
'tno-prototype-builtins': [0],
},
}
2 changes: 1 addition & 1 deletion docs/README.md
Expand Up @@ -41,7 +41,7 @@
### 待完成的路由

- bilibili []
- 百度
- 百度 []
- 贴吧
- 微博
- 爱恋动漫
Expand Down
10 changes: 9 additions & 1 deletion docs/api.md
Expand Up @@ -19,4 +19,12 @@
<Route author="CaoMeiYouRen" example="/bilibili/?keyword=888888&page=1" path="/bilibili/?keyword=keyword&&page=page&sort=sort" :paramsDesc="['搜索关键词']" />

search_type:查询类型。视频:video;番剧:media_bangumi;影视:media_ft;直播:live;专栏:article;话题:topic;用户:bili_user;相簿:photo。默认为video
sort:排序方式, 综合:totalrank 最多点击:click 最新发布:pubdate(缺省) 最多弹幕:dm 最多收藏:stow

sort:排序方式, 综合:totalrank 最多点击:click 最新发布:pubdate(缺省) 最多弹幕:dm 最多收藏:stow


## 百度

### 搜索

<Route author="CaoMeiYouRen" example="/baidu/www/?keyword=888888" path="/bilibili/?keyword=keyword" />
1 change: 1 addition & 0 deletions package.json
Expand Up @@ -67,6 +67,7 @@
"axios": "^0.19.2",
"cheerio": "^1.0.0-rc.3",
"colors": "^1.4.0",
"dayjs": "^1.8.28",
"debug": "^4.1.1",
"dotenv": "^8.2.0",
"file-stream-rotator": "^0.5.7",
Expand Down
57 changes: 57 additions & 0 deletions src/routes/baidu/www/controllers/index.ts
@@ -0,0 +1,57 @@
import Koa = require('koa')
import queryString = require('query-string')
import cheerio = require('cheerio')
import fs = require('fs-extra')
import path = require('path')
import { HttpError, RssChannel, RssItem } from '@/models'
import { ajax, removeHtmlTag } from '@/utils'
import { IS_DEBUG, CACHE } from '@/config'

export async function index(ctx: Koa.Context, next: Koa.Next) {
const { keyword, page, limit } = ctx.query
if (!keyword) {
throw new HttpError(400, '提交的搜索内容为空!')
}
const Cookie = await ctx.cache?.get('www.baidu.com/s-cookie') || ''

const result = await ajax('https://www.baidu.com/s', {
wd: keyword,
}, {}, 'GET', {
Cookie,
})
if (result.headers['set-cookie'] && result.headers['set-cookie'][0]) {
await ctx.cache?.set('www.baidu.com/s-cookie', result.headers['set-cookie'][0])
}
ctx.status = result.status
if (ctx.status === 200) {
const data = result.data
// await fs.writeFile(path.join(__dirname, `./${keyword}_2.html`), data)
// const $ = cheerio.load(await fs.readFile(path.join(__dirname, `./${keyword}.html`)))
const $ = cheerio.load(data)
const list = $('div.c-container.result')

const channel: RssChannel = new RssChannel({
title: '百度搜索',
link: `${result.config.url}?${queryString.stringify(result.config.params)}`,
description: '百度搜索',
webMaster: 'CaoMeiYouRen',
item: list?.map((i, e) => {
const f = $(e)
const link = f.find('h3.t>a').first().attr('href') || ''
const item = new RssItem({
title: removeHtmlTag(f.children('h3').text()).trim(),
link,
description: removeHtmlTag(f.find('div.c-abstract').first().text()),
guid: link,
})
return item
}).get().slice(0, limit),
pageSize: data?.list?.length,
count: data?.total,
})
ctx.body = channel
} else {
const message = IS_DEBUG ? result['stack'] : result['message']
ctx.body = { message }
}
}
7 changes: 7 additions & 0 deletions src/routes/baidu/www/index.ts
@@ -0,0 +1,7 @@
import Router = require('@koa/router')
import { index } from './controllers'

const router = new Router()
router.get('/', index)

export default router
21 changes: 18 additions & 3 deletions src/routes/router.ts
@@ -1,13 +1,28 @@
import Router = require('@koa/router')
import wwwBaidu from './baidu/www'
import bilibili from './bilibili'
import imageSoCom from './image.so.com'
import pansou from './pansou'
import { HttpError } from '@/models'

const router = new Router()

import pansou from './pansou'
router.use(async (ctx, next) => {
if (!ctx.query?.keyword) {
throw new HttpError(400, '提交的搜索内容为空!')
}
await next()
})

router.use('/pansou', pansou.routes(), pansou.allowedMethods())

import imageSoCom from './image.so.com'
router.use('/image.so.com', imageSoCom.routes(), imageSoCom.allowedMethods())

import bilibili from './bilibili'
router.use('/bilibili', bilibili.routes(), bilibili.allowedMethods())

router.use('/baidu/www', wwwBaidu.routes(), wwwBaidu.allowedMethods())

// import weibo from './weibo'
// router.use('/weibo/article', weibo.routes(), weibo.allowedMethods())

export default router
50 changes: 50 additions & 0 deletions src/routes/weibo/controllers/index.ts
@@ -0,0 +1,50 @@
import Koa = require('koa')
import queryString = require('query-string')
import cheerio = require('cheerio')
import fs = require('fs-extra')
import path = require('path')
import { HttpError, RssChannel, RssItem } from '@/models'
import { ajax, removeHtmlTag, dateParser } from '@/utils'
import { IS_DEBUG, CACHE } from '@/config'

export async function index(ctx: Koa.Context, next: Koa.Next) {
const { keyword, page, limit } = ctx.query
const result = await ajax('https://s.weibo.com/article', {
q: keyword,
Refer: 'weibo_article',
})
ctx.status = result.status
if (ctx.status === 200) {
const data = result.data
// await fs.writeFile(path.join(__dirname, `./${keyword}_2.html`), data)
// const $ = cheerio.load(data)
const $ = cheerio.load(await fs.readFile(path.join(__dirname, `./${keyword}.html`)))
const list = $('div.card-wrap')

const channel: RssChannel = new RssChannel({
title: `微博文章搜索 - ${keyword}`,
link: `${result.config.url}?${queryString.stringify(result.config.params)}`,
description: '微博文章搜索',
webMaster: 'CaoMeiYouRen',
item: list?.map((i, e) => {
const f = $(e)
const link = f.find('h3>a').first().attr('href') || ''
const item = new RssItem({
title: removeHtmlTag(f.find('h3').first().text()).trim(),
link,
author: removeHtmlTag(f.find('div').last().find('span').first().text()).replace(/@/, ''),
description: removeHtmlTag(f.find('div.content p.txt').first().text()),
guid: link,
pubDate: new Date(dateParser(removeHtmlTag(f.find('div').last().find('span').last().text()))),
})
return item
}).get().slice(0, limit),
pageSize: data?.list?.length,
count: data?.total,
})
ctx.body = channel
} else {
const message = IS_DEBUG ? result['stack'] : result['message']
ctx.body = { message }
}
}
7 changes: 7 additions & 0 deletions src/routes/weibo/index.ts
@@ -0,0 +1,7 @@
import Router = require('@koa/router')
import { index } from './controllers'

const router = new Router()
router.get('/', index)

export default router
81 changes: 81 additions & 0 deletions src/utils/date.ts
@@ -0,0 +1,81 @@
// 格式化 类型这个的时间 , 几分钟前 | 几小时前 | 几天前 | 几月前 | 几年前 | 具体的格式不对的时间
const serverOffset = new Date().getTimezoneOffset() / 60
export function dateParser(html: string, timeZone = -serverOffset) {
let math
let date = new Date()
if (/(\d+)分钟前/.exec(html)) {
math = /(\d+)分钟前/.exec(html)
date.setMinutes(date.getMinutes() - math[1])
date.setSeconds(0)
} else if (/(\d+)小时前/.exec(html)) {
math = /(\d+)小时前/.exec(html)
date.setHours(date.getHours() - math[1])
} else if (/(\d+)天前/.exec(html)) {
math = /(\d+)天前/.exec(html)
date.setDate(date.getDate() - math[1])
} else if (/(\d+)月前/.exec(html)) {
math = /(\d+)月前/.exec(html)
date.setMonth(date.getMonth() - math[1])
} else if (/(\d+)年前/.exec(html)) {
math = /(\d+)年前/.exec(html)
date.setFullYear(date.getFullYear() - math[1])
} else if (/今天\s*(\d+):(\d+)/.exec(html)) {
math = /今天\s*(\d+):(\d+)/.exec(html)
date = new Date(date.getFullYear(), date.getMonth(), date.getDate(), math[1], math[2])
} else if (/昨天\s*(\d+):(\d+)/.exec(html)) {
math = /昨天\s*(\d+):(\d+)/.exec(html)
date = new Date(date.getFullYear(), date.getMonth(), date.getDate() - 1, math[1], math[2])
} else if (/前天\s*(\d+):(\d+)/.exec(html)) {
math = /前天\s*(\d+):(\d+)/.exec(html)
date = new Date(date.getFullYear(), date.getMonth(), date.getDate() - 2, math[1], math[2])
} else if (/(\d+)年(\d+)月(\d+)日(\d+)时/.exec(html)) {
math = /(\d+)年(\d+)月(\d+)日(\d+)时/.exec(html)
date = new Date(parseInt(math[1]), parseInt(math[2]) - 1, parseInt(math[3]), parseInt(math[4]))
} else if (/(\d+)年(\d+)月(\d+)日/.exec(html)) {
math = /(\d+)年(\d+)月(\d+)日/.exec(html)
date = new Date(parseInt(math[1]), parseInt(math[2]) - 1, parseInt(math[3]))
} else if (/(\d+)-(\d+)-(\d+) (\d+):(\d+)/.exec(html)) {
math = /(\d+)-(\d+)-(\d+) (\d+):(\d+)/.exec(html)
date = new Date(math[1], parseInt(math[2]) - 1, math[3], math[4], math[5])
} else if (/(\d+)-(\d+) (\d+):(\d+)/.exec(html)) {
math = /(\d+)-(\d+) (\d+):(\d+)/.exec(html)
date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2], math[3], math[4])
} else if (/(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+):(\d+)/.exec(html)) {
math = /(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+):(\d+)/.exec(html)
date = new Date(math[1], parseInt(math[2]) - 1, math[3], math[4], math[5], math[6])
} else if (/(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+)/.exec(html)) {
math = /(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+)/.exec(html)
date = new Date(math[1], parseInt(math[2]) - 1, math[3], math[4], math[5])
} else if (/(\d+)\/(\d+)\s*(\d+):(\d+)/.exec(html)) {
math = /(\d+)\/(\d+)\s*(\d+):(\d+)/.exec(html)
date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2], math[3], math[4])
} else if (/(\d+)月(\d+)日 (\d+):(\d+)/.exec(html)) {
math = /(\d+)月(\d+)日 (\d+):(\d+)/.exec(html)
date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2], math[3], math[4])
} else if (/(\d+)月(\d+)日/.exec(html)) {
math = /(\d+)月(\d+)日/.exec(html)
date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2])
} else if (/(\d+)月(\d+)号/.exec(html)) {
math = /(\d+)月(\d+)号/.exec(html)
date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2])
} else if (/(\d+)\/(\d+)/.exec(html)) {
math = /(\d+)\/(\d+)/.exec(html)
date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2])
} else if (/(\d+)-(\d+)-(\d+)/.exec(html)) {
math = /(\d+)-(\d+)-(\d+)/.exec(html)
date = new Date(math[1], parseInt(math[2]) - 1, math[3])
} else if (/(\d+)-(\d+)/.exec(html)) {
math = /(\d+)-(\d+)/.exec(html)
date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2])
} else if (/(\d+):(\d+)/.exec(html)) {
math = /(\d+):(\d+)/.exec(html)
date = new Date(date.getFullYear(), date.getMonth(), date.getDate(), math[1], math[2])
} else if (/刚刚/.exec(html)) {
math = /刚刚/.exec(html)
}

if (math) {
return new Date(date.getTime() - 60 * 60 * 1000 * (timeZone + serverOffset)).toUTCString()
}
return html
}
88 changes: 88 additions & 0 deletions src/utils/dateParser.ts
@@ -0,0 +1,88 @@
import { dateParser as date } from './date'
import dayjs from 'dayjs'
import customParseFormat from 'dayjs/plugin/customParseFormat'
import utc from 'dayjs/plugin/utc'
import { Log } from './helper'
dayjs.extend(utc)
dayjs.extend(customParseFormat)

/**
* Convert unconventional i8n to the one supported by dayjs https://bit.ly/2psVwIJ
* @param {String} x i8n string
*/
const i8nconv = (x: string) => {
const c = {
'zh-hans': 'zh-cn',
'zh-chs': 'zh-cn',
'zh-sg': 'zh-cn',
'zh-hant': 'zh-hk',
'zh-cht': 'zh-hk',
'zh-mo': 'zh-hk',
}
for (const prop in c) {
if (RegExp(`^${prop}$`, 'i').test(x)) {
x = c[prop]
break
}
}
return x
}

/**
* A function to convert a string of time based on specified format
* @param {string} [html] A string of time to convert.
* @param {string} [customFormat] Format to parse html by dayjs.
* @param {string} [lang=en] Language (must be supported by dayjs).
* @param {number} [htmlOffset=0] UTC offset of html. It will be neglected if html contains timezone indicated by strings like "+0800".
*/
function tStringParser(html: string, customFormat: string, lang: string = 'en', htmlOffset: number = 0) {
lang = i8nconv(lang)

// Remove weekdays and comma from the string
// dayjs v1.8.16 is not able to parse weekdays
// https://github.com/iamkun/dayjs/blob/dev/docs/en/Plugin.md#list-of-all-available-format-tokens
// We don't remove weekdayMini since the month may contains weekdayMini, like "六" in "六月"
let removeStr: any[] = []
if (lang !== 'en') {
try {
require(`dayjs/locale/${lang}`)
if (/^zh/.test(lang)) {
removeStr = removeStr.concat([','])
}
// Add locale
dayjs.locale(lang)
} catch (error) {
Log.error(`Locale "${lang}" passed to dateParser is not supported by dayjs`)
return date(html)
}
}
Object.values(dayjs.Ls).forEach((k: any) => {
['weekdays', 'weekdaysShort'].forEach((x) => {
// eslint-disable-next-line no-prototype-builtins
if (k.hasOwnProperty(x)) {
const a = k[x].map((z) => `${z}`)
removeStr = removeStr.concat(...a)
}
})
})
removeStr = removeStr.concat([',', 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'])
let htmlP = html
removeStr.forEach((x) => {
// Order matters
htmlP = htmlP.replace(RegExp(x, 'gi'), '')
})

const d = dayjs.utc(htmlP, customFormat)
// console.log(htmlP,d)
if (d.isValid()) {
if (/[+-](\d{2}:?\d{2})/.test(html)) {
return d.toDate().toUTCString()
} else {
return d.add(htmlOffset, 'h').toDate().toUTCString()
}
} else {
return date(html)
}
}

export { i8nconv, tStringParser }
3 changes: 2 additions & 1 deletion src/utils/dingtalk.ts
@@ -1,5 +1,6 @@
import fs from 'fs-extra'
import path from 'path'
import colors from 'colors'
import { Robot, Text, Markdown } from 'ts-dingtalk-robot'
import { DINGTALK, ENABLE_PUSH } from '@/config'
import { getPublicIP } from './ajax'
Expand All @@ -26,7 +27,7 @@ if (ENABLE_PUSH) {
*/
export async function dingtalk(title: string, text?: string) {
if (!robot) {
console.error('robot未初始化!')
console.warn(colors.yellow('robot未初始化!'))
return
}
if (!text) {
Expand Down
2 changes: 2 additions & 0 deletions src/utils/index.ts
@@ -1,4 +1,6 @@
export * from './ajax'
export * from './date'
export * from './dateParser'
export * from './dingtalk'
export * from './encrypt'
export * from './helper'
Expand Down

0 comments on commit c429a75

Please sign in to comment.