forked from drawrowfly/tiktok-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
takahisa.furuta
authored and
takahisa.furuta
committed
Jan 14, 2023
1 parent
e3e8f00
commit cbe0675
Showing
31 changed files
with
2,445 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
declare const _default: { | ||
scrape: string[]; | ||
chronologicalTypes: string[]; | ||
history: string[]; | ||
requiredSession: string[]; | ||
sourceType: { | ||
user: number; | ||
music: number; | ||
trend: number; | ||
}; | ||
verifyFp: () => never; | ||
userAgent: () => string; | ||
}; | ||
export = _default; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
"use strict"; | ||
module.exports = { | ||
scrape: [ | ||
'user', | ||
'hashtag', | ||
'trend', | ||
'music', | ||
'discover_user', | ||
'discover_hashtag', | ||
'discover_music', | ||
'history', | ||
'video', | ||
'from-file', | ||
'userprofile', | ||
], | ||
chronologicalTypes: ['user'], | ||
history: ['user', 'hashtag', 'trend', 'music'], | ||
requiredSession: ['user', 'hashtag', 'trend', 'music'], | ||
sourceType: { | ||
user: 8, | ||
music: 11, | ||
trend: 12, | ||
}, | ||
verifyFp: () => { | ||
const variants = []; | ||
return variants[Math.floor(Math.random() * variants.length)]; | ||
}, | ||
userAgent: () => { | ||
const os = [ | ||
'Macintosh; Intel Mac OS X 10_15_7', | ||
'Macintosh; Intel Mac OS X 10_15_5', | ||
'Macintosh; Intel Mac OS X 10_11_6', | ||
'Macintosh; Intel Mac OS X 10_6_6', | ||
'Macintosh; Intel Mac OS X 10_9_5', | ||
'Macintosh; Intel Mac OS X 10_10_5', | ||
'Macintosh; Intel Mac OS X 10_7_5', | ||
'Macintosh; Intel Mac OS X 10_11_3', | ||
'Macintosh; Intel Mac OS X 10_10_3', | ||
'Macintosh; Intel Mac OS X 10_6_8', | ||
'Macintosh; Intel Mac OS X 10_10_2', | ||
'Macintosh; Intel Mac OS X 10_10_3', | ||
'Macintosh; Intel Mac OS X 10_11_5', | ||
'Windows NT 10.0; Win64; x64', | ||
'Windows NT 10.0; WOW64', | ||
'Windows NT 10.0', | ||
]; | ||
return `Mozilla/5.0 (${os[Math.floor(Math.random() * os.length)]}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${Math.floor(Math.random() * 3) + 87}.0.${Math.floor(Math.random() * 190) + 4100}.${Math.floor(Math.random() * 50) + 140} Safari/537.36`; | ||
}, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
/// <reference types="node" /> | ||
import { CookieJar } from 'request'; | ||
import { MultipleBar } from '../helpers'; | ||
import { DownloaderConstructor, PostCollector, DownloadParams, Headers } from '../types'; | ||
export declare class Downloader { | ||
progress: boolean; | ||
mbars: MultipleBar; | ||
progressBar: any[]; | ||
private proxy; | ||
noWaterMark: boolean; | ||
filepath: string; | ||
bulk: boolean; | ||
headers: Headers; | ||
cookieJar: CookieJar; | ||
constructor({ progress, proxy, noWaterMark, headers, filepath, bulk, cookieJar }: DownloaderConstructor); | ||
private get getProxy(); | ||
addBar(type: boolean, len: number): any[]; | ||
toBuffer(item: PostCollector): Promise<Buffer>; | ||
downloadPosts({ zip, folder, collector, fileName, asyncDownload }: DownloadParams): Promise<unknown>; | ||
downloadSingleVideo(post: PostCollector): Promise<void>; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
"use strict"; | ||
var __importDefault = (this && this.__importDefault) || function (mod) { | ||
return (mod && mod.__esModule) ? mod : { "default": mod }; | ||
}; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.Downloader = void 0; | ||
const request_1 = __importDefault(require("request")); | ||
const request_promise_1 = __importDefault(require("request-promise")); | ||
const fs_1 = require("fs"); | ||
const bluebird_1 = require("bluebird"); | ||
const archiver_1 = __importDefault(require("archiver")); | ||
const socks_proxy_agent_1 = require("socks-proxy-agent"); | ||
const async_1 = require("async"); | ||
const helpers_1 = require("../helpers"); | ||
class Downloader { | ||
constructor({ progress, proxy, noWaterMark, headers, filepath, bulk, cookieJar }) { | ||
this.progress = true || progress; | ||
this.progressBar = []; | ||
this.noWaterMark = noWaterMark; | ||
this.headers = headers; | ||
this.filepath = filepath; | ||
this.mbars = new helpers_1.MultipleBar(); | ||
this.proxy = proxy; | ||
this.bulk = bulk; | ||
this.cookieJar = cookieJar; | ||
} | ||
get getProxy() { | ||
if (Array.isArray(this.proxy)) { | ||
const selectProxy = this.proxy.length ? this.proxy[Math.floor(Math.random() * this.proxy.length)] : ''; | ||
return { | ||
socks: false, | ||
proxy: selectProxy, | ||
}; | ||
} | ||
if (this.proxy.indexOf('socks4://') > -1 || this.proxy.indexOf('socks5://') > -1) { | ||
return { | ||
socks: true, | ||
proxy: new socks_proxy_agent_1.SocksProxyAgent(this.proxy), | ||
}; | ||
} | ||
return { | ||
socks: false, | ||
proxy: this.proxy, | ||
}; | ||
} | ||
addBar(type, len) { | ||
this.progressBar.push(this.mbars.newBar(`Downloading (${!type ? 'WITH WM' : 'WITHOUT WM'}) :id [:bar] :percent`, { | ||
complete: '=', | ||
incomplete: ' ', | ||
width: 30, | ||
total: len, | ||
})); | ||
return this.progressBar[this.progressBar.length - 1]; | ||
} | ||
toBuffer(item) { | ||
return new Promise((resolve, reject) => { | ||
const proxy = this.getProxy; | ||
let r = request_1.default; | ||
let barIndex; | ||
let buffer = Buffer.from(''); | ||
if (proxy.proxy && !proxy.socks) { | ||
r = request_1.default.defaults({ proxy: `http://${proxy.proxy}/` }); | ||
} | ||
if (proxy.proxy && proxy.socks) { | ||
r = request_1.default.defaults({ agent: proxy.proxy }); | ||
} | ||
r.get({ | ||
url: item.videoUrlNoWaterMark ? item.videoUrlNoWaterMark : item.videoUrl, | ||
headers: this.headers, | ||
jar: this.cookieJar, | ||
}) | ||
.on('response', response => { | ||
const len = parseInt(response.headers['content-length'], 10); | ||
if (this.progress && !this.bulk && len) { | ||
barIndex = this.addBar(!!item.videoUrlNoWaterMark, len); | ||
} | ||
if (this.progress && !this.bulk && !len) { | ||
console.log(`Empty response! You can try again with a proxy! Can't download video: ${item.id}`); | ||
} | ||
}) | ||
.on('data', chunk => { | ||
if (chunk.length) { | ||
buffer = Buffer.concat([buffer, chunk]); | ||
if (this.progress && !this.bulk && barIndex && barIndex.hasOwnProperty('tick')) { | ||
barIndex.tick(chunk.length, { id: item.id }); | ||
} | ||
} | ||
}) | ||
.on('end', () => { | ||
resolve(buffer); | ||
}) | ||
.on('error', () => { | ||
reject(new Error(`Cant download video: ${item.id}. If you were using proxy, please try without it.`)); | ||
}); | ||
}); | ||
} | ||
downloadPosts({ zip, folder, collector, fileName, asyncDownload }) { | ||
return new Promise((resolve, reject) => { | ||
const saveDestination = zip ? `${fileName}.zip` : folder; | ||
const archive = archiver_1.default('zip', { | ||
gzip: true, | ||
zlib: { level: 9 }, | ||
}); | ||
if (zip) { | ||
const output = fs_1.createWriteStream(saveDestination); | ||
archive.pipe(output); | ||
} | ||
async_1.forEachLimit(collector, asyncDownload, (item, cb) => { | ||
this.toBuffer(item) | ||
.then(async (buffer) => { | ||
if (buffer.length) { | ||
item.downloaded = true; | ||
if (zip) { | ||
archive.append(buffer, { name: `${item.id}.mp4` }); | ||
} | ||
else { | ||
await bluebird_1.fromCallback(cback => fs_1.writeFile(`${saveDestination}/${item.id}.mp4`, buffer, cback)); | ||
} | ||
} | ||
else { | ||
item.downloaded = false; | ||
} | ||
cb(null); | ||
}) | ||
.catch(() => { | ||
item.downloaded = false; | ||
cb(null); | ||
}); | ||
}, error => { | ||
if (error) { | ||
return reject(error); | ||
} | ||
if (zip) { | ||
archive.finalize(); | ||
archive.on('end', () => resolve('')); | ||
} | ||
else { | ||
resolve(''); | ||
} | ||
}); | ||
}); | ||
} | ||
async downloadSingleVideo(post) { | ||
const proxy = this.getProxy; | ||
let url = post.videoUrlNoWaterMark; | ||
if (!url) { | ||
url = post.videoUrl; | ||
} | ||
const options = Object.assign(Object.assign({ uri: url, method: 'GET', jar: this.cookieJar, headers: this.headers, encoding: null }, (proxy.proxy && proxy.socks ? { agent: proxy.proxy } : {})), (proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {})); | ||
const result = await request_promise_1.default(options); | ||
await bluebird_1.fromCallback(cb => fs_1.writeFile(`${this.filepath}/${post.id}.mp4`, result, cb)); | ||
} | ||
} | ||
exports.Downloader = Downloader; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/// <reference types="node" /> | ||
import { CookieJar } from 'request'; | ||
import { EventEmitter } from 'events'; | ||
import { PostCollector, TikTokConstructor, Result, MusicMetadata, UserMetadata, HashtagMetadata, Headers } from '../types'; | ||
import { Downloader } from '../core'; | ||
export declare class TikTokScraper extends EventEmitter { | ||
private mainHost; | ||
private userIdStore; | ||
private download; | ||
private filepath; | ||
private json2csvParser; | ||
private filetype; | ||
private input; | ||
private proxy; | ||
private strictSSL; | ||
private number; | ||
private since; | ||
private asyncDownload; | ||
private asyncScraping; | ||
private collector; | ||
private event; | ||
private scrapeType; | ||
private cli; | ||
private spinner; | ||
private byUserId; | ||
private storeHistory; | ||
private historyPath; | ||
private idStore; | ||
Downloader: Downloader; | ||
private storeValue; | ||
private maxCursor; | ||
private noWaterMark; | ||
private noDuplicates; | ||
private timeout; | ||
private bulk; | ||
private validHeaders; | ||
private csrf; | ||
private zip; | ||
private fileName; | ||
private test; | ||
private hdVideo; | ||
private webHookUrl; | ||
private method; | ||
private httpRequests; | ||
headers: Headers; | ||
private sessionList; | ||
private verifyFp; | ||
private store; | ||
cookieJar: CookieJar; | ||
constructor({ download, filepath, filetype, proxy, strictSSL, asyncDownload, cli, event, progress, input, number, since, type, by_user_id, store_history, historyPath, noWaterMark, useTestEndpoints, fileName, timeout, bulk, zip, test, hdVideo, webHookUrl, method, headers, verifyFp, sessionList, }: TikTokConstructor); | ||
private get fileDestination(); | ||
private get folderDestination(); | ||
private get getApiEndpoint(); | ||
private get getProxy(); | ||
private request; | ||
private returnInitError; | ||
scrape(): Promise<Result | any>; | ||
private withoutWatermark; | ||
private extractVideoId; | ||
private getUrlWithoutTheWatermark; | ||
private mainLoop; | ||
private submitScrapingRequest; | ||
private saveCollectorData; | ||
saveMetadata({ json, csv }: { | ||
json: any; | ||
csv: any; | ||
}): Promise<void>; | ||
private getDownloadedVideosFromHistory; | ||
private storeDownloadProgress; | ||
private collectPosts; | ||
private getValidHeaders; | ||
private scrapeData; | ||
private getTrendingFeedQuery; | ||
private getMusicFeedQuery; | ||
private getHashTagId; | ||
private getUserId; | ||
getUserProfileInfo(): Promise<UserMetadata>; | ||
getHashtagInfo(): Promise<HashtagMetadata>; | ||
getMusicInfo(): Promise<MusicMetadata>; | ||
signUrl(): Promise<any>; | ||
private getVideoMetadataFromHtml; | ||
private getVideoMetadata; | ||
getVideoMeta(html?: boolean): Promise<PostCollector>; | ||
private sendDataToWebHookUrl; | ||
} |
Oops, something went wrong.