fix(script): better to download images

Chilfish · Mar 9, 2024 · 1382565 · 1382565
1 parent ccd3a15
commit 1382565
Show file tree

Hide file tree

Showing 5 changed files with 71 additions and 103 deletions.
diff --git a/scripts/cookies.txt b/scripts/cookies.txt
diff --git a/scripts/download.mjs b/scripts/download.mjs
@@ -1,101 +1,52 @@
 import { existsSync, mkdirSync } from 'node:fs'
 import { readFile, writeFile } from 'node:fs/promises'
-import { join } from 'node:path'
 import { Buffer } from 'node:buffer'
+import path from 'node:path'
 
-const imgs_path = join('imgs.csv')
+const imgs_path = path.resolve(process.argv[2] || 'imgs.csv')
 
-let cookie = ''
-
-/**
- * 读取图片列表
- */
 const url_list = await readFile(imgs_path, 'utf-8')
-  .then(text => text.trim().split(',\n'))
+  .then(text => text.split(',\n'))
   .catch((e) => {
     console.error(`未找到 imgs.csv 文件, ${e}`)
     process.exit(1)
   })
 
-const download_folder = join('images')
+const download_folder = path.resolve('images')
 if (!existsSync(download_folder))
   mkdirSync(download_folder, { recursive: true })
 
-/**
- * 确认是否为评论图片
- * @param {string} url
- */
-const isCommentPic = url => url.startsWith('https://photo.weibo.com/h5/comment')
-
-async function readCookie() {
-  const cookie_path = join(args.dir, 'cookies.txt')
-  const cookie = await readFile(cookie_path, 'utf-8')
-
-  if (!cookie) {
-    console.error(
-      '⚠未填写 cookies.txt 文件，将无法下载评论图片\n',
-      '请在浏览器中登录微博，F12 打开控制台，输入 document.cookie，将输出的内容（不含引号）复制到 cookies.txt 文件中，并保存。然后再运行一遍该脚本\n',
-    )
-  }
-  return cookie.trim()
-}
-
-/**
- * 用于兼容旧版的bug，将评论图片转为真实的图片地址
- * @param {string} url
- */
-async function getCommentPic(url) {
-  if (!cookie)
-    cookie = await readCookie()
-
-  const res = await fetch(url, {
-    headers: {
-      Cookie: cookie,
-    },
-  })
+console.log(`imgs.csv 文件路径：${imgs_path}\n图片保存路径：${download_folder}`)
+console.log(`共有 ${url_list.length} 张图片需要下载，将会跳过已存在的图片，开始下载中... Ctrl+C 可以中断下载。`)
 
-  const text = await res.text()
-  const match = text.match(/src="([^"]+)"/)
-
-  return match?.[1].replace('bmiddle', 'large')
-}
+for (let url of url_list) {
+  try {
+    url = url.trim()
+    if (url)
+      continue
 
-/**
- *  下载图片
- * @param {string} url 图片地址
- */
-async function download(url) {
-  if (!url)
-    return
+    const file_name = url.split('/').pop().split('?')[0]
+    const prefix = url.match(/^(?:https?:\/\/)?([^:\/\n]+)/)?.[1]
 
-  const file_name = url.split('/').pop().split('?')[0]
-  const prefix = url.match(/^(?:https?:\/\/)?([^:\/\n]+)/)?.[1]
+    if (!prefix)
+      throw new Error(`无法获取图片地址`)
 
-  if (!prefix)
-    return
+    const file_path = path.resolve(download_folder, `${prefix}-${file_name}`)
+    if (existsSync(file_path))
+      continue
 
-  const file_path = join(download_folder, `${prefix}-${file_name}`)
-  if (existsSync(file_path))
-    return
+    const res = await fetch(url, {
+      headers: {
+        referrer: 'https://weibo.com/',
+      },
+    })
 
-  const res = await fetch(url, {
-    headers: {
-      referrer: 'https://weibo.com/',
-    },
-  })
-  const buffer = await res.arrayBuffer()
-
-  await writeFile(file_path, Buffer.from(buffer))
-}
-
-console.log('开始下载图片，请不要关闭')
-
-for (let url of url_list) {
-  try {
-    if (isCommentPic(url))
-      url = await getCommentPic(url)
+    // 如果不是图片，直接返回
+    if (!res.headers.get('content-type')?.startsWith('image'))
+      continue
 
-    await download(url)
+    const buffer = await res.arrayBuffer()
+    await writeFile(file_path, Buffer.from(buffer))
   }
   catch (e) {
     console.error(`下载失败：${url}，原因：${e.cause || e.message}`)

diff --git a/scripts/run.bat b/scripts/run.bat
@@ -1,8 +1,5 @@
 chcp 65001
-
 @echo off
-title 微博备份工具
-
 :begin
 cd %~dp0
 
@@ -13,10 +10,19 @@ echo 2. 启动本地的图片服务器
 set /p UserChoice="请输入您的选择（1、2）: "
 
 if "%UserChoice%"=="1" (
-  node ./download.mjs
+  set /p imgsPath="请输入 imgs.csv 文件的完整路径（留空则使用默认路径）:"
+
+  REM 如果留空，则使用默认路径
+  if "%imgsPath%"=="" (
+    set imgsPath="imgs.csv"
+  )
+
+  echo imgsPath: %imgsPath%
+
+  node download.mjs "%imgsPath%"
   pause
 ) else if "%UserChoice%"=="2" (
-  node ./server.mjs
+  node server.mjs
   pause
 ) else (
   echo 无效的选择，请输入1或2。

diff --git a/scripts/server.mjs b/scripts/server.mjs
@@ -1,35 +1,48 @@
 import * as http from 'node:http'
 import * as fs from 'node:fs'
 import * as path from 'node:path'
-import * as zlib from 'node:zlib'
+import { promisify } from 'node:util'
+import { createGzip } from 'node:zlib'
+import { pipeline } from 'node:stream/promises'
 
-const server = http.createServer((req, res) => {
-  const filePath = path.join('images', req.url)
+const access = promisify(fs.access)
+const createReadStream = fs.createReadStream
 
-  fs.access(filePath, fs.constants.F_OK, (err) => {
-    if (err) {
-      res.statusCode = 404
-      res.end('File not found')
-    }
-    else {
-      // 使用流式传输将文件发送给客户端，并使用压缩
-      const fileStream = fs.createReadStream(filePath)
-      const compressStream = fileStream.pipe(zlib.createGzip())
+const folder = 'images'
+const folderPath = path.join(process.cwd(), folder)
 
-      res.setHeader('Content-Encoding', 'gzip')
-      res.setHeader('Cache-Control', 'public, max-age=3600') // 缓存1小时
-      res.setHeader('Expires', new Date(Date.now() + 3600000).toUTCString()) // 过期时间为1小时后
+if (!fs.existsSync(folderPath)) {
+  console.error(`图片文件夹 ${folderPath} 不存在`)
+  process.exit(1)
+}
 
-      compressStream.pipe(res)
-    }
-  })
+const server = http.createServer(async (req, res) => {
+  const filePath = path.join(folderPath, req.url)
+
+  try {
+    await access(filePath, fs.constants.F_OK)
+
+    const fileStream = createReadStream(filePath)
+    const compressStream = createGzip()
+
+    res.setHeader('Content-Encoding', 'gzip')
+    res.setHeader('Cache-Control', 'public, max-age=3600')
+    res.setHeader('Expires', new Date(Date.now() + 3600000).toUTCString())
+
+    await pipeline(fileStream, compressStream, res)
+  }
+  catch (err) {
+    res.statusCode = 404
+    res.end(`File not found: ${req.url}`)
+  }
 })
 
 function startServer(port) {
   server.listen(port)
   server.on('error', (err) => {
     if (err.code === 'EADDRINUSE') {
       console.log(`端口 ${port} 已被占用，尝试使用其他端口...`)
+      server.close()
       startServer(port + 1)
     }
     else {
@@ -44,3 +57,4 @@ function startServer(port) {
 
 const port = 3000
 startServer(port)
+console.log('图片文件夹：', folderPath)
diff --git a/scripts/只有0.2.6以前的版本才需要cookies.txt b/scripts/只有0.2.6以前的版本才需要cookies.txt