feat: 新增百度搜索路由

CaoMeiYouRen · Jun 13, 2020 · c429a75 · c429a75
1 parent 5bb3589
commit c429a75
Show file tree

Hide file tree

Showing 15 changed files with 336 additions and 8 deletions.
diff --git a/.eslintrc.js b/.eslintrc.js
@@ -74,5 +74,6 @@ module.exports = {
         'no-useless-constructor': [2], // 禁用不必要的构造函数
         'spaced-comment': [2, 'always'], // 要求或禁止在注释前有空白
         'sort-imports': [0], // import 排序
+        'tno-prototype-builtins': [0],
     },
 }
diff --git a/docs/README.md b/docs/README.md
@@ -41,7 +41,7 @@
 ### 待完成的路由
 
 -   bilibili [√]
--   百度
+-   百度 [√]
 -   贴吧
 -   微博
 -   爱恋动漫

diff --git a/docs/api.md b/docs/api.md
@@ -19,4 +19,12 @@
 <Route author="CaoMeiYouRen" example="/bilibili/?keyword=888888&page=1" path="/bilibili/?keyword=keyword&&page=page&sort=sort" :paramsDesc="['搜索关键词']" />
 
 search_type：查询类型。视频：video；番剧：media_bangumi；影视：media_ft；直播：live；专栏：article；话题：topic；用户：bili_user；相簿：photo。默认为video
-sort：排序方式, 综合:totalrank 最多点击:click 最新发布:pubdate(缺省) 最多弹幕:dm 最多收藏:stow
+
+sort：排序方式, 综合:totalrank 最多点击:click 最新发布:pubdate(缺省) 最多弹幕:dm 最多收藏:stow
+
+
+## 百度
+
+### 搜索
+
+<Route author="CaoMeiYouRen" example="/baidu/www/?keyword=888888" path="/bilibili/?keyword=keyword"  />
diff --git a/package.json b/package.json
@@ -67,6 +67,7 @@
     "axios": "^0.19.2",
     "cheerio": "^1.0.0-rc.3",
     "colors": "^1.4.0",
+    "dayjs": "^1.8.28",
     "debug": "^4.1.1",
     "dotenv": "^8.2.0",
     "file-stream-rotator": "^0.5.7",

diff --git a/src/routes/baidu/www/controllers/index.ts b/src/routes/baidu/www/controllers/index.ts
@@ -0,0 +1,57 @@
+import Koa = require('koa')
+import queryString = require('query-string')
+import cheerio = require('cheerio')
+import fs = require('fs-extra')
+import path = require('path')
+import { HttpError, RssChannel, RssItem } from '@/models'
+import { ajax, removeHtmlTag } from '@/utils'
+import { IS_DEBUG, CACHE } from '@/config'
+
+export async function index(ctx: Koa.Context, next: Koa.Next) {
+    const { keyword, page, limit } = ctx.query
+    if (!keyword) {
+        throw new HttpError(400, '提交的搜索内容为空！')
+    }
+    const Cookie = await ctx.cache?.get('www.baidu.com/s-cookie') || ''
+
+    const result = await ajax('https://www.baidu.com/s', {
+        wd: keyword,
+    }, {}, 'GET', {
+        Cookie,
+    })
+    if (result.headers['set-cookie'] && result.headers['set-cookie'][0]) {
+        await ctx.cache?.set('www.baidu.com/s-cookie', result.headers['set-cookie'][0])
+    }
+    ctx.status = result.status
+    if (ctx.status === 200) {
+        const data = result.data
+        // await fs.writeFile(path.join(__dirname, `./${keyword}_2.html`), data)
+        // const $ = cheerio.load(await fs.readFile(path.join(__dirname, `./${keyword}.html`)))
+        const $ = cheerio.load(data)
+        const list = $('div.c-container.result')
+
+        const channel: RssChannel = new RssChannel({
+            title: '百度搜索',
+            link: `${result.config.url}?${queryString.stringify(result.config.params)}`,
+            description: '百度搜索',
+            webMaster: 'CaoMeiYouRen',
+            item: list?.map((i, e) => {
+                const f = $(e)
+                const link = f.find('h3.t>a').first().attr('href') || ''
+                const item = new RssItem({
+                    title: removeHtmlTag(f.children('h3').text()).trim(),
+                    link,
+                    description: removeHtmlTag(f.find('div.c-abstract').first().text()),
+                    guid: link,
+                })
+                return item
+            }).get().slice(0, limit),
+            pageSize: data?.list?.length,
+            count: data?.total,
+        })
+        ctx.body = channel
+    } else {
+        const message = IS_DEBUG ? result['stack'] : result['message']
+        ctx.body = { message }
+    }
+}
diff --git a/src/routes/baidu/www/index.ts b/src/routes/baidu/www/index.ts
@@ -0,0 +1,7 @@
+import Router = require('@koa/router')
+import { index } from './controllers'
+
+const router = new Router()
+router.get('/', index)
+
+export default router
diff --git a/src/routes/router.ts b/src/routes/router.ts
@@ -1,13 +1,28 @@
 import Router = require('@koa/router')
+import wwwBaidu from './baidu/www'
+import bilibili from './bilibili'
+import imageSoCom from './image.so.com'
+import pansou from './pansou'
+import { HttpError } from '@/models'
+
 const router = new Router()
 
-import pansou from './pansou'
+router.use(async (ctx, next) => {
+    if (!ctx.query?.keyword) {
+        throw new HttpError(400, '提交的搜索内容为空！')
+    }
+    await next()
+})
+
 router.use('/pansou', pansou.routes(), pansou.allowedMethods())
 
-import imageSoCom from './image.so.com'
 router.use('/image.so.com', imageSoCom.routes(), imageSoCom.allowedMethods())
 
-import bilibili from './bilibili'
 router.use('/bilibili', bilibili.routes(), bilibili.allowedMethods())
 
+router.use('/baidu/www', wwwBaidu.routes(), wwwBaidu.allowedMethods())
+
+// import weibo from './weibo'
+// router.use('/weibo/article', weibo.routes(), weibo.allowedMethods())
+
 export default router
diff --git a/src/routes/weibo/controllers/index.ts b/src/routes/weibo/controllers/index.ts
@@ -0,0 +1,50 @@
+import Koa = require('koa')
+import queryString = require('query-string')
+import cheerio = require('cheerio')
+import fs = require('fs-extra')
+import path = require('path')
+import { HttpError, RssChannel, RssItem } from '@/models'
+import { ajax, removeHtmlTag, dateParser } from '@/utils'
+import { IS_DEBUG, CACHE } from '@/config'
+
+export async function index(ctx: Koa.Context, next: Koa.Next) {
+    const { keyword, page, limit } = ctx.query
+    const result = await ajax('https://s.weibo.com/article', {
+        q: keyword,
+        Refer: 'weibo_article',
+    })
+    ctx.status = result.status
+    if (ctx.status === 200) {
+        const data = result.data
+        // await fs.writeFile(path.join(__dirname, `./${keyword}_2.html`), data)
+        // const $ = cheerio.load(data)
+        const $ = cheerio.load(await fs.readFile(path.join(__dirname, `./${keyword}.html`)))
+        const list = $('div.card-wrap')
+
+        const channel: RssChannel = new RssChannel({
+            title: `微博文章搜索 - ${keyword}`,
+            link: `${result.config.url}?${queryString.stringify(result.config.params)}`,
+            description: '微博文章搜索',
+            webMaster: 'CaoMeiYouRen',
+            item: list?.map((i, e) => {
+                const f = $(e)
+                const link = f.find('h3>a').first().attr('href') || ''
+                const item = new RssItem({
+                    title: removeHtmlTag(f.find('h3').first().text()).trim(),
+                    link,
+                    author: removeHtmlTag(f.find('div').last().find('span').first().text()).replace(/@/, ''),
+                    description: removeHtmlTag(f.find('div.content p.txt').first().text()),
+                    guid: link,
+                    pubDate: new Date(dateParser(removeHtmlTag(f.find('div').last().find('span').last().text()))),
+                })
+                return item
+            }).get().slice(0, limit),
+            pageSize: data?.list?.length,
+            count: data?.total,
+        })
+        ctx.body = channel
+    } else {
+        const message = IS_DEBUG ? result['stack'] : result['message']
+        ctx.body = { message }
+    }
+}
diff --git a/src/routes/weibo/index.ts b/src/routes/weibo/index.ts
@@ -0,0 +1,7 @@
+import Router = require('@koa/router')
+import { index } from './controllers'
+
+const router = new Router()
+router.get('/', index)
+
+export default router
diff --git a/src/utils/date.ts b/src/utils/date.ts
@@ -0,0 +1,81 @@
+// 格式化 类型这个的时间 ， 几分钟前 | 几小时前 | 几天前 | 几月前 | 几年前 | 具体的格式不对的时间
+const serverOffset = new Date().getTimezoneOffset() / 60
+export function dateParser(html: string, timeZone = -serverOffset) {
+    let math
+    let date = new Date()
+    if (/(\d+)分钟前/.exec(html)) {
+        math = /(\d+)分钟前/.exec(html)
+        date.setMinutes(date.getMinutes() - math[1])
+        date.setSeconds(0)
+    } else if (/(\d+)小时前/.exec(html)) {
+        math = /(\d+)小时前/.exec(html)
+        date.setHours(date.getHours() - math[1])
+    } else if (/(\d+)天前/.exec(html)) {
+        math = /(\d+)天前/.exec(html)
+        date.setDate(date.getDate() - math[1])
+    } else if (/(\d+)月前/.exec(html)) {
+        math = /(\d+)月前/.exec(html)
+        date.setMonth(date.getMonth() - math[1])
+    } else if (/(\d+)年前/.exec(html)) {
+        math = /(\d+)年前/.exec(html)
+        date.setFullYear(date.getFullYear() - math[1])
+    } else if (/今天\s*(\d+):(\d+)/.exec(html)) {
+        math = /今天\s*(\d+):(\d+)/.exec(html)
+        date = new Date(date.getFullYear(), date.getMonth(), date.getDate(), math[1], math[2])
+    } else if (/昨天\s*(\d+):(\d+)/.exec(html)) {
+        math = /昨天\s*(\d+):(\d+)/.exec(html)
+        date = new Date(date.getFullYear(), date.getMonth(), date.getDate() - 1, math[1], math[2])
+    } else if (/前天\s*(\d+):(\d+)/.exec(html)) {
+        math = /前天\s*(\d+):(\d+)/.exec(html)
+        date = new Date(date.getFullYear(), date.getMonth(), date.getDate() - 2, math[1], math[2])
+    } else if (/(\d+)年(\d+)月(\d+)日(\d+)时/.exec(html)) {
+        math = /(\d+)年(\d+)月(\d+)日(\d+)时/.exec(html)
+        date = new Date(parseInt(math[1]), parseInt(math[2]) - 1, parseInt(math[3]), parseInt(math[4]))
+    } else if (/(\d+)年(\d+)月(\d+)日/.exec(html)) {
+        math = /(\d+)年(\d+)月(\d+)日/.exec(html)
+        date = new Date(parseInt(math[1]), parseInt(math[2]) - 1, parseInt(math[3]))
+    } else if (/(\d+)-(\d+)-(\d+) (\d+):(\d+)/.exec(html)) {
+        math = /(\d+)-(\d+)-(\d+) (\d+):(\d+)/.exec(html)
+        date = new Date(math[1], parseInt(math[2]) - 1, math[3], math[4], math[5])
+    } else if (/(\d+)-(\d+) (\d+):(\d+)/.exec(html)) {
+        math = /(\d+)-(\d+) (\d+):(\d+)/.exec(html)
+        date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2], math[3], math[4])
+    } else if (/(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+):(\d+)/.exec(html)) {
+        math = /(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+):(\d+)/.exec(html)
+        date = new Date(math[1], parseInt(math[2]) - 1, math[3], math[4], math[5], math[6])
+    } else if (/(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+)/.exec(html)) {
+        math = /(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+)/.exec(html)
+        date = new Date(math[1], parseInt(math[2]) - 1, math[3], math[4], math[5])
+    } else if (/(\d+)\/(\d+)\s*(\d+):(\d+)/.exec(html)) {
+        math = /(\d+)\/(\d+)\s*(\d+):(\d+)/.exec(html)
+        date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2], math[3], math[4])
+    } else if (/(\d+)月(\d+)日 (\d+):(\d+)/.exec(html)) {
+        math = /(\d+)月(\d+)日 (\d+):(\d+)/.exec(html)
+        date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2], math[3], math[4])
+    } else if (/(\d+)月(\d+)日/.exec(html)) {
+        math = /(\d+)月(\d+)日/.exec(html)
+        date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2])
+    } else if (/(\d+)月(\d+)号/.exec(html)) {
+        math = /(\d+)月(\d+)号/.exec(html)
+        date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2])
+    } else if (/(\d+)\/(\d+)/.exec(html)) {
+        math = /(\d+)\/(\d+)/.exec(html)
+        date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2])
+    } else if (/(\d+)-(\d+)-(\d+)/.exec(html)) {
+        math = /(\d+)-(\d+)-(\d+)/.exec(html)
+        date = new Date(math[1], parseInt(math[2]) - 1, math[3])
+    } else if (/(\d+)-(\d+)/.exec(html)) {
+        math = /(\d+)-(\d+)/.exec(html)
+        date = new Date(date.getFullYear(), parseInt(math[1]) - 1, math[2])
+    } else if (/(\d+):(\d+)/.exec(html)) {
+        math = /(\d+):(\d+)/.exec(html)
+        date = new Date(date.getFullYear(), date.getMonth(), date.getDate(), math[1], math[2])
+    } else if (/刚刚/.exec(html)) {
+        math = /刚刚/.exec(html)
+    }
+
+    if (math) {
+        return new Date(date.getTime() - 60 * 60 * 1000 * (timeZone + serverOffset)).toUTCString()
+    }
+    return html
+}
diff --git a/src/utils/dateParser.ts b/src/utils/dateParser.ts
@@ -0,0 +1,88 @@
+import { dateParser as date } from './date'
+import dayjs from 'dayjs'
+import customParseFormat from 'dayjs/plugin/customParseFormat'
+import utc from 'dayjs/plugin/utc'
+import { Log } from './helper'
+dayjs.extend(utc)
+dayjs.extend(customParseFormat)
+
+/**
+ * Convert unconventional i8n to the one supported by dayjs https://bit.ly/2psVwIJ
+ * @param {String} x i8n string
+ */
+const i8nconv = (x: string) => {
+    const c = {
+        'zh-hans': 'zh-cn',
+        'zh-chs': 'zh-cn',
+        'zh-sg': 'zh-cn',
+        'zh-hant': 'zh-hk',
+        'zh-cht': 'zh-hk',
+        'zh-mo': 'zh-hk',
+    }
+    for (const prop in c) {
+        if (RegExp(`^${prop}$`, 'i').test(x)) {
+            x = c[prop]
+            break
+        }
+    }
+    return x
+}
+
+/**
+ * A function to convert a string of time based on specified format
+ * @param {string} [html]                    A string of time to convert.
+ * @param {string} [customFormat]  Format to parse html by dayjs.
+ * @param {string} [lang=en]                 Language (must be supported by dayjs).
+ * @param {number}    [htmlOffset=0]            UTC offset of html. It will be neglected if html contains timezone indicated by strings like "+0800".
+ */
+function tStringParser(html: string, customFormat: string, lang: string = 'en', htmlOffset: number = 0) {
+    lang = i8nconv(lang)
+
+    // Remove weekdays and comma from the string
+    // dayjs v1.8.16 is not able to parse weekdays
+    // https://github.com/iamkun/dayjs/blob/dev/docs/en/Plugin.md#list-of-all-available-format-tokens
+    // We don't remove weekdayMini since the month may contains weekdayMini, like "六" in "六月"
+    let removeStr: any[] = []
+    if (lang !== 'en') {
+        try {
+            require(`dayjs/locale/${lang}`)
+            if (/^zh/.test(lang)) {
+                removeStr = removeStr.concat(['，'])
+            }
+            // Add locale
+            dayjs.locale(lang)
+        } catch (error) {
+            Log.error(`Locale "${lang}" passed to dateParser is not supported by dayjs`)
+            return date(html)
+        }
+    }
+    Object.values(dayjs.Ls).forEach((k: any) => {
+        ['weekdays', 'weekdaysShort'].forEach((x) => {
+            // eslint-disable-next-line no-prototype-builtins
+            if (k.hasOwnProperty(x)) {
+                const a = k[x].map((z) => `${z}`)
+                removeStr = removeStr.concat(...a)
+            }
+        })
+    })
+    removeStr = removeStr.concat([',', 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'])
+    let htmlP = html
+    removeStr.forEach((x) => {
+        // Order matters
+        htmlP = htmlP.replace(RegExp(x, 'gi'), '')
+    })
+
+    const d = dayjs.utc(htmlP, customFormat)
+    // console.log(htmlP,d)
+    if (d.isValid()) {
+        if (/[+-](\d{2}:?\d{2})/.test(html)) {
+            return d.toDate().toUTCString()
+        } else {
+            return d.add(htmlOffset, 'h').toDate().toUTCString()
+        }
+    } else {
+        return date(html)
+    }
+}
+
+export { i8nconv, tStringParser }
diff --git a/src/utils/dingtalk.ts b/src/utils/dingtalk.ts
@@ -1,5 +1,6 @@
 import fs from 'fs-extra'
 import path from 'path'
+import colors from 'colors'
 import { Robot, Text, Markdown } from 'ts-dingtalk-robot'
 import { DINGTALK, ENABLE_PUSH } from '@/config'
 import { getPublicIP } from './ajax'
@@ -26,7 +27,7 @@ if (ENABLE_PUSH) {
  */
 export async function dingtalk(title: string, text?: string) {
     if (!robot) {
-        console.error('robot未初始化！')
+        console.warn(colors.yellow('robot未初始化！'))
         return
     }
     if (!text) {

diff --git a/src/utils/index.ts b/src/utils/index.ts
@@ -1,4 +1,6 @@
 export * from './ajax'
+export * from './date'
+export * from './dateParser'
 export * from './dingtalk'
 export * from './encrypt'
 export * from './helper'