Skip to content

Commit

Permalink
fix(script): better to download images
Browse files Browse the repository at this point in the history
  • Loading branch information
Chilfish committed Mar 9, 2024
1 parent ccd3a15 commit 1382565
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 103 deletions.
Empty file removed scripts/cookies.txt
Empty file.
105 changes: 28 additions & 77 deletions scripts/download.mjs
Original file line number Diff line number Diff line change
@@ -1,101 +1,52 @@
import { existsSync, mkdirSync } from 'node:fs'
import { readFile, writeFile } from 'node:fs/promises'
import { join } from 'node:path'
import { Buffer } from 'node:buffer'
import path from 'node:path'

const imgs_path = join('imgs.csv')
const imgs_path = path.resolve(process.argv[2] || 'imgs.csv')

let cookie = ''

/**
* 读取图片列表
*/
const url_list = await readFile(imgs_path, 'utf-8')
.then(text => text.trim().split(',\n'))
.then(text => text.split(',\n'))
.catch((e) => {
console.error(`未找到 imgs.csv 文件, ${e}`)
process.exit(1)
})

const download_folder = join('images')
const download_folder = path.resolve('images')
if (!existsSync(download_folder))
mkdirSync(download_folder, { recursive: true })

/**
* 确认是否为评论图片
* @param {string} url
*/
const isCommentPic = url => url.startsWith('https://photo.weibo.com/h5/comment')

async function readCookie() {
const cookie_path = join(args.dir, 'cookies.txt')
const cookie = await readFile(cookie_path, 'utf-8')

if (!cookie) {
console.error(
'⚠未填写 cookies.txt 文件,将无法下载评论图片\n',
'请在浏览器中登录微博,F12 打开控制台,输入 document.cookie,将输出的内容(不含引号)复制到 cookies.txt 文件中,并保存。然后再运行一遍该脚本\n',
)
}
return cookie.trim()
}

/**
* 用于兼容旧版的bug,将评论图片转为真实的图片地址
* @param {string} url
*/
async function getCommentPic(url) {
if (!cookie)
cookie = await readCookie()

const res = await fetch(url, {
headers: {
Cookie: cookie,
},
})
console.log(`imgs.csv 文件路径:${imgs_path}\n图片保存路径:${download_folder}`)
console.log(`共有 ${url_list.length} 张图片需要下载,将会跳过已存在的图片,开始下载中... Ctrl+C 可以中断下载。`)

const text = await res.text()
const match = text.match(/src="([^"]+)"/)

return match?.[1].replace('bmiddle', 'large')
}
for (let url of url_list) {
try {
url = url.trim()
if (url)
continue

/**
* 下载图片
* @param {string} url 图片地址
*/
async function download(url) {
if (!url)
return
const file_name = url.split('/').pop().split('?')[0]
const prefix = url.match(/^(?:https?:\/\/)?([^:\/\n]+)/)?.[1]

const file_name = url.split('/').pop().split('?')[0]
const prefix = url.match(/^(?:https?:\/\/)?([^:\/\n]+)/)?.[1]
if (!prefix)
throw new Error(`无法获取图片地址`)

if (!prefix)
return
const file_path = path.resolve(download_folder, `${prefix}-${file_name}`)
if (existsSync(file_path))
continue

const file_path = join(download_folder, `${prefix}-${file_name}`)
if (existsSync(file_path))
return
const res = await fetch(url, {
headers: {
referrer: 'https://weibo.com/',
},
})

const res = await fetch(url, {
headers: {
referrer: 'https://weibo.com/',
},
})
const buffer = await res.arrayBuffer()

await writeFile(file_path, Buffer.from(buffer))
}

console.log('开始下载图片,请不要关闭')

for (let url of url_list) {
try {
if (isCommentPic(url))
url = await getCommentPic(url)
// 如果不是图片,直接返回
if (!res.headers.get('content-type')?.startsWith('image'))
continue

await download(url)
const buffer = await res.arrayBuffer()
await writeFile(file_path, Buffer.from(buffer))
}
catch (e) {
console.error(`下载失败:${url},原因:${e.cause || e.message}`)
Expand Down
16 changes: 11 additions & 5 deletions scripts/run.bat
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
chcp 65001

@echo off
title 微博备份工具

:begin
cd %~dp0

Expand All @@ -13,10 +10,19 @@ echo 2. 启动本地的图片服务器
set /p UserChoice="请输入您的选择(1、2): "

if "%UserChoice%"=="1" (
node ./download.mjs
set /p imgsPath="请输入 imgs.csv 文件的完整路径(留空则使用默认路径):"

REM 如果留空,则使用默认路径
if "%imgsPath%"=="" (
set imgsPath="imgs.csv"
)

echo imgsPath: %imgsPath%

node download.mjs "%imgsPath%"
pause
) else if "%UserChoice%"=="2" (
node ./server.mjs
node server.mjs
pause
) else (
echo 无效的选择,请输入1或2。
Expand Down
50 changes: 32 additions & 18 deletions scripts/server.mjs
Original file line number Diff line number Diff line change
@@ -1,35 +1,48 @@
import * as http from 'node:http'
import * as fs from 'node:fs'
import * as path from 'node:path'
import * as zlib from 'node:zlib'
import { promisify } from 'node:util'
import { createGzip } from 'node:zlib'
import { pipeline } from 'node:stream/promises'

const server = http.createServer((req, res) => {
const filePath = path.join('images', req.url)
const access = promisify(fs.access)
const createReadStream = fs.createReadStream

fs.access(filePath, fs.constants.F_OK, (err) => {
if (err) {
res.statusCode = 404
res.end('File not found')
}
else {
// 使用流式传输将文件发送给客户端,并使用压缩
const fileStream = fs.createReadStream(filePath)
const compressStream = fileStream.pipe(zlib.createGzip())
const folder = 'images'
const folderPath = path.join(process.cwd(), folder)

res.setHeader('Content-Encoding', 'gzip')
res.setHeader('Cache-Control', 'public, max-age=3600') // 缓存1小时
res.setHeader('Expires', new Date(Date.now() + 3600000).toUTCString()) // 过期时间为1小时后
if (!fs.existsSync(folderPath)) {
console.error(`图片文件夹 ${folderPath} 不存在`)
process.exit(1)
}

compressStream.pipe(res)
}
})
const server = http.createServer(async (req, res) => {
const filePath = path.join(folderPath, req.url)

try {
await access(filePath, fs.constants.F_OK)

const fileStream = createReadStream(filePath)
const compressStream = createGzip()

res.setHeader('Content-Encoding', 'gzip')
res.setHeader('Cache-Control', 'public, max-age=3600')
res.setHeader('Expires', new Date(Date.now() + 3600000).toUTCString())

await pipeline(fileStream, compressStream, res)
}
catch (err) {
res.statusCode = 404
res.end(`File not found: ${req.url}`)
}
})

function startServer(port) {
server.listen(port)
server.on('error', (err) => {
if (err.code === 'EADDRINUSE') {
console.log(`端口 ${port} 已被占用,尝试使用其他端口...`)
server.close()
startServer(port + 1)
}
else {
Expand All @@ -44,3 +57,4 @@ function startServer(port) {

const port = 3000
startServer(port)
console.log('图片文件夹:', folderPath)
3 changes: 0 additions & 3 deletions scripts/只有0.2.6以前的版本才需要cookies.txt

This file was deleted.

0 comments on commit 1382565

Please sign in to comment.