-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpopulate_vulhubs.nim
96 lines (72 loc) · 2.9 KB
/
populate_vulhubs.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import json, streams, uri, strutils, os, re
import os, strformat, strutils, uri, db_postgres
import lib/github/[client, repository]
import db/dbutils
proc toReadmePath(url: string): string =
## Converts repo url to README path
let split = parseUri(url).path.split('/')
result = split[^2..^1].join("/")
result.add "/README.md"
proc toRepoSubdir(url: string): string =
let split = parseUri(url).path.split('/')
result = split[^2..^1].join("/")
proc fetchReadme(cl: GithubApiClient, url: string): string =
# https://github.com/vulhub/vulhub/blob/master/activemq/CVE-2015-5254/README.md
var res = cl.getContents("vulhub", "vulhub", url.toReadmePath())
return res.bodyStream.readAll()
proc rewriteReadme(content: string, subDir: string): string =
# TODO: translate from chinese to english
# replace link destination from relative to absolute
# ^\[([\w\s\d]+)\]\(((?:\/|https?:\/\/)[\w\d./?=#]+)\)$
# replace image src from relative to absolute
# !\[[^\]]*\]\((?<filename>.*?)(?=\"|\))\)
result = content
let rawBaseUrl = &"https://raw.githubusercontent.com/vulhub/vulhub/master/{subDir}"
let imgRx = re"""!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))\)"""
# let linkRx = re"""[^!]?\[(.+)\]\((.+)\)"""
result = result.replacef(imgRx, &"")
# result = content.replace(linkRx, "")
result = result.replace("[中文版本(Chinese version)](README.zh-cn.md)", "")
result = strip(result)
type
Vulhub = object
rowId: string
url: string
readmeRaw: string
when isMainModule:
let
connStr = parseDbUrl(getEnv("DATABASE_URL", ""))
token = getEnv("GITHUB_TOKEN", "")
db = db_postgres.open("", "", "", connStr)
echo "github access token: " & token
var vulhubs: seq[Vulhub]
# let rows1 = db.getAllRows(sql"select id, url from vulhubs where content is NULL")
let rows1 = db.getAllRows(sql"select id, url, readme_raw from vulhubs")
for row in rows1:
vulhubs.add Vulhub(rowId: row[0], url: row[1], readmeRaw: row[2])
echo &"{len(vulhubs)} vulhubs"
when defined(populate):
var cl = newGithubApiClient(token)
for vulhub in vulhubs:
let content = cl.fetchReadme(vulhub.url)
try:
db.exec(sql("update vulhubs set readme_raw = ? where id = ?"), @[content, vulhub.rowId])
except:
let
e = getCurrentException()
msg = getCurrentExceptionMsg()
echo "Got exception ", repr(e), " with message ", msg
echo &"{vulhub.rowId}: {vulhub.url}"
when defined(rewrite):
for vulhub in vulhubs:
let readme = rewriteReadme(vulhub.readmeRaw, vulhub.url.toRepoSubdir())
try:
db.exec(sql("update vulhubs set readme = ? where id = ?"), @[readme, vulhub.rowId])
except:
let
e = getCurrentException()
msg = getCurrentExceptionMsg()
echo "Got exception ", repr(e), " with message ", msg
echo &"{vulhub.rowId}: {vulhub.url}"
echo "done"
db.close()