diff --git a/.github/workflows/export-osv.yml b/.github/workflows/export-osv.yml new file mode 100644 index 00000000..937e4ab6 --- /dev/null +++ b/.github/workflows/export-osv.yml @@ -0,0 +1,86 @@ +name: Update IDs, timestamps, and export OSV + +on: + pull_request: + push: + branches: + - main + +jobs: + update-and-export: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Julia + uses: julia-actions/setup-julia@v1 + + - name: Install Julia dependencies + run: | + julia --project=. -e 'using Pkg; Pkg.instantiate()' + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: 'stable' + + - name: Install osv-linter + run: | + git clone https://github.com/JuliaComputing/osv-schema.git -b juliahub/julia + cd ./osv-schema/tools/osv-linter + go build -o ../../../osv-linter ./cmd/osv + cd ../../.. + rm -rf osv-schema + + - name: Assign ids and timestamps + id: assign + run: | + julia --project=. scripts/assign_ids_and_dates.jl + + - name: Push updated advisories to main + if: ${{ steps.assign.outputs.changes && github.event_name == 'push' && github.ref_name == 'main' }} + run: | + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git add advisories + git commit -m '${{ steps.assign.outputs.title }}' + git push origin main + + - name: Export OSV + id: export + run: | + julia --project=. scripts/export_osv.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Validate JSON + run: | + output=$(./osv-linter record lint osv 2>&1 || true) + if [ -n "$output" ]; then + echo "$output" + exit 1 + fi + + - name: Publish OSV data + if: ${{ github.event_name == 'push' && github.ref_name == 'main' }} + env: + GITHUB_BRANCH: ${{ github.ref_name }} + GITHUB_SHA: ${{ github.sha }} + run: | + DATA_DIR=$PWD/osv + BRANCH=generated/osv + REF=refs/remotes/origin/$BRANCH + export GIT_WORK_TREE=$DATA_DIR + git read-tree "$REF" + git add --all --intent-to-add + git diff --quiet && exit + git add --all + TREE=$(git write-tree) + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + COMMIT=$(git commit-tree "$TREE" -p "$REF" -m "[automated] export ($GITHUB_BRANCH)@($GITHUB_SHA)") + git push origin $COMMIT:$BRANCH diff --git a/.github/workflows/sync-upstream-advisories.yml b/.github/workflows/sync-upstream-advisories.yml deleted file mode 100644 index 602fdfa7..00000000 --- a/.github/workflows/sync-upstream-advisories.yml +++ /dev/null @@ -1,78 +0,0 @@ -name: Sync upstream advisories - -on: - pull_request_review: - types: [submitted] - workflow_dispatch: - -jobs: - sync-upstream-advisories: - if: github.event_name != 'pull_request_review' || (github.event_name == 'pull_request_review' && github.event.review.state == 'approved') - runs-on: ubuntu-latest - - steps: - - name: Checkout repository (if on main) - if: github.event_name != 'pull_request_review' - uses: actions/checkout@v4 - with: - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Checkout PR branch (if on PR) - if: github.event_name == 'pull_request_review' - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.ref }} - repository: ${{ github.event.pull_request.head.repo.full_name }} - - - name: Setup Julia - uses: julia-actions/setup-julia@v1 - - - name: Setup Go - uses: actions/setup-go@v2 - with: - go-version: '^1.16.4' - env: - GONOPROXY: github.com/google/osv - - - name: Install Julia dependencies - run: | - julia --project=. -e 'using Pkg; Pkg.instantiate()' - - - name: Sync upstream advisories - id: sync - run: | - julia --project=. scripts/sync_upstream_advisories.jl - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - NVD_API_KEY: ${{ secrets.NVD_API_KEY }} - - - name: Check for changes - id: git-check - run: | - [[ $(git ls-files --others --modified --exclude-standard) ]] && echo "changes=true" >> $GITHUB_OUTPUT || echo "no changes" - [[ $(git ls-files --others --exclude-standard) ]] && echo "additions=true" >> $GITHUB_OUTPUT || echo "no changes" - - - name: Auto-assign IDs if required - if: steps.git-check.outputs.additions == 'true' - run: | - go install github.com/google/osv/vulnfeeds/cmd/ids@latest - ids -dir=./packages -prefix DONOTUSEJLSEC -format json - env: - GONOPROXY: github.com/google/osv - - - name: Commit sync changes directly to the PR - if: steps.git-check.outputs.changes == 'true' && github.event_name == 'pull_request_review' - run: | - git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" - git config --local user.name "github-actions[bot]" - git add packages/ - git commit -m "${{ steps.sync.outputs.title }}" - git push - - - name: Create Pull Request if not already on a PR - if: steps.git-check.outputs.changes == 'true' && github.event_name != 'pull_request_review' - uses: peter-evans/create-pull-request@v7 - with: - title: ${{ steps.sync.outputs.title }} - body: ${{ steps.sync.outputs.body }} - branch: "sync-upstream-advisories" diff --git a/.github/workflows/validate.yaml b/.github/workflows/validate.yaml deleted file mode 100644 index da2ee99a..00000000 --- a/.github/workflows/validate.yaml +++ /dev/null @@ -1,37 +0,0 @@ -name: JSON Schema Validation -on: - pull_request: - push: - branches: - - main - workflow_dispatch: - -jobs: - validate: - runs-on: ubuntu-22.04 - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 # For git diff - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: 'stable' - - - name: Install osv-linter - run: | - git clone https://github.com/JuliaComputing/osv-schema.git -b juliahub/julia - cd ./osv-schema/tools/osv-linter - go build -o ../../../osv-linter ./cmd/osv - cd ../../.. - rm -rf osv-schema - - - name: Validate JSON - run: | - output=$(./osv-linter record lint packages 2>&1 || true) - if [ -n "$output" ]; then - echo "$output" - exit 1 - fi diff --git a/Project.toml b/Project.toml index 7e02fe88..87c37bd7 100644 --- a/Project.toml +++ b/Project.toml @@ -9,18 +9,20 @@ DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" [compat] -julia = "1.11" CodecZlib = "0.7.8" DataStructures = "0.19.0" +Markdown = "1.11.0" Pkg = "1.11.0" Tar = "1.10.0" TimeZones = "1.22.0" +julia = "1.11" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/advisories/published/2025/DONOTUSEJLSEC-0000-3pwil8p118owt.md b/advisories/published/2025/DONOTUSEJLSEC-0000-3pwil8p118owt.md new file mode 100644 index 00000000..c3a38697 --- /dev/null +++ b/advisories/published/2025/DONOTUSEJLSEC-0000-3pwil8p118owt.md @@ -0,0 +1,73 @@ +```toml +schema_version = "1.7.3" +id = "DONOTUSEJLSEC-0000-3pwil8p118owt" +modified = 2025-09-22T21:31:21.347Z +aliases = ["GHSA-4g68-4pxg-mw93", "CVE-2025-52479"] + +[[affected]] +pkg = "HTTP" +ranges = ["<= 1.10.16"] +[[affected]] +pkg = "URIs" +ranges = ["< 1.6.0"] + +[database_specific.source] +id = "GHSA-4g68-4pxg-mw93" +modified = "2025-06-24T23:01:25Z" +published = "2025-06-24T23:01:25Z" +imported = "2025-09-22T21:31:21.283Z" +url = "https://api.github.com/repos/JuliaWeb/HTTP.jl/security-advisories/GHSA-4g68-4pxg-mw93" +html_url = "https://github.com/JuliaWeb/HTTP.jl/security/advisories/GHSA-4g68-4pxg-mw93" +``` + +# CR/LF injection in URIs.jl (also affects HTTP.jl) + +### Description + +The URIs.jl and HTTP.jl packages allowed the construction of URIs containing CR/LF characters. If user input was not otherwise escaped or protected, this can lead to a CRLF injection attack. + +With this simple Julia code, you can inject a custom header named `Foo` with the value `bar`: + +``` julia +import HTTP + +HTTP.get("http://localhost:1337/ HTTP/1.1\r\nFoo: bar\r\nbaz:") +``` + +The server will receive the request like this: + +``` +➜ ncat -klp 1337 +GET / HTTP/1.1 +Foo: bar <-- injected header! +baz: HTTP/1.1 +Host: locahost:1337 +Accept: */* +User-Agent: HTTP.jl/1.11.5 +Content-Length: 0 +Accept-Encoding: gzip +``` + +### Impact + +Inject headers or inject data to the request body and cause “HTTP response splitting”. + +### Patches + +Users of HTTP.jl should upgrade immediately to HTTP.jl v1.10.17. All prior versions are vulnerable. + +Users of URIs.jl should upgrade immediately to URIs.jl v1.6.0. All prior versions are vulnerable. + +The check for valid URIs is now in the URI.jl package, and the latest version of HTTP.jl incorporates that fix. + +### Workarounds + +Manually validate any URIs before passing them on to functions in this package. + +### References + +Fixed by: https://github.com/JuliaWeb/URIs.jl/pull/66 (which is available in URIs.jl v1.6.0). + +### Credits + +Thanks to *splitline* from the DEVCORE Research Team for reporting this issue. diff --git a/scripts/assign_ids_and_dates.jl b/scripts/assign_ids_and_dates.jl new file mode 100644 index 00000000..7d11e51c --- /dev/null +++ b/scripts/assign_ids_and_dates.jl @@ -0,0 +1,79 @@ +using AdvisoryDB +using Dates: Dates, DateTime +using TimeZones: TimeZones, ZonedDateTime + +function main() + all_advisories_path = joinpath(@__DIR__, "..", "advisories") + published_advisories_path = joinpath(@__DIR__, "..", "advisories", "published") + + # First look through all advisories for the latest identifier + # TODO: We could use a lock/mutex of the form of a sentinel upstream branch here + year = Dates.year(Dates.now(Dates.UTC)) + last_id = 0 + for (root, _, files) in walkdir(all_advisories_path), file in files + AdvisoryDB.is_jlsec_advisory_path(joinpath(root, file)) || continue + prefix = string(AdvisoryDB.PREFIX, "-", year, "-") + startswith(file, prefix) || continue + last_id = max(last_id, something(tryparse(Int, chopprefix(chopsuffix(file, ".md"), prefix)), 0)) + end + + # Then go through the published advisories and ensure all IDs are assigned + # and that the dates accurately match the commit dates + n_updated = 0 + for (root, _, files) in walkdir(published_advisories_path), file in files + path = joinpath(root, file) + AdvisoryDB.is_jlsec_advisory_path(path) || continue + advisory = AdvisoryDB.parsefile(path) + updated = false + if startswith(advisory.id, string(AdvisoryDB.PREFIX, "-0000-")) + last_id += 1 + advisory.id = string(AdvisoryDB.PREFIX, "-", year, "-", last_id) + updated = true + newpath = joinpath(root, string(advisory.id, ".md")) + @info "moving $file to $(advisory.id).md" + success(`git mv $path $newpath`) || run(`mv $path $newpath`) + path = newpath + modified = published = Dates.now(Dates.UTC) + else + git_modified = readchomp(`git log -1 --format="%ad" --date=iso-strict -- $path`) + modified = isempty(git_modified) ? Dates.now(Dates.UTC) : DateTime(ZonedDateTime(git_modified), Dates.UTC) + git_published = readchomp(`git log --format="%ad" --date=iso-strict --diff-filter=A -- $path`) + published = isempty(git_published) ? modified : DateTime(ZonedDateTime(git_published), Dates.UTC) + end + if something(advisory.withdrawn, typemin(DateTime)) > advisory.modified + # If the withdrawn date is _after_ the previously stored modified time, then it's a new modification + # The effective time of the widthdraw will be upon publication to this repo — the new modified time + advisory.withdrawn = modified + advisory.modified = modified + updated = true + end + if abs(advisory.modified - modified) > Dates.Minute(10) + advisory.modified = modified + updated = true + end + if abs(something(advisory.published, DateTime(0)) - published) > Dates.Minute(10) + advisory.published = published + updated = true + end + + if updated + # TODO: we could do better by applying a git diff that only includes the semantically meaningful parts + @info "writing $(advisory.id)" + open(path, "w") do io + print(io, advisory) + end + n_updated += 1 + end + end + + @info "updated $n_updated advisories" + io = haskey(ENV, "GITHUB_OUTPUT") ? open(ENV["GITHUB_OUTPUT"], "w") : stdout + println(io, "changes=", n_updated > 0) + if n_updated > 0 + println(io, "title=[automated] assign id/timestamp $n_updated advisories") + end +end + +if abspath(PROGRAM_FILE) == @__FILE__ + main() +end diff --git a/scripts/export_osv.jl b/scripts/export_osv.jl new file mode 100644 index 00000000..6bc34744 --- /dev/null +++ b/scripts/export_osv.jl @@ -0,0 +1,21 @@ +using AdvisoryDB: AdvisoryDB +using JSON3: JSON3 +using Dates: Dates + +function main() + published_advisories_path = joinpath(@__DIR__, "..", "advisories", "published") + output = joinpath(@__DIR__, "..", "osv") + for (root, _, files) in walkdir(published_advisories_path), file in files + advisory = AdvisoryDB.parsefile(joinpath(root, file)) + osv = AdvisoryDB.to_osv_dict(advisory) + outpath = joinpath(output, string(Dates.year(advisory.published))) + mkpath(outpath) + open(joinpath(outpath, advisory.id * ".json"), "w") do io + JSON3.write(io, osv) + end + end +end + +if abspath(PROGRAM_FILE) == @__FILE__ + main() +end diff --git a/scripts/search_upstream_advisories.jl b/scripts/search_upstream_advisories.jl index 87b277ce..514fd26e 100644 --- a/scripts/search_upstream_advisories.jl +++ b/scripts/search_upstream_advisories.jl @@ -2,55 +2,20 @@ # database: GitHub's GHSA, NIST/NVD's CVE, or ESINA's EUVD. using AdvisoryDB: AdvisoryDB, NVD, EUVD, GitHub, VersionRange using TOML: TOML - +using Dates: Dates function main() - upstream_advisories_file = joinpath(@__DIR__, "..", "upstream_advisories.toml") - upstream_advisories = isfile(upstream_advisories_file) ? TOML.parsefile(upstream_advisories_file) : Dict{String, Any}() input = get(ARGS, 1, "") - advisories = Dict{Tuple{String,String},Any}() - advisory_details = Dict{Tuple{String,String},Any}() - advisory_sources = Dict{Tuple{String,String},Any}() + advisories = Dict{String,Any}() info = Dict{String,Any}() info["haystack"] = input info["haystack_total"] = String[] - info["version_trouble"] = String[] - info["skips"] = String[] - specific_advisory_import = false - if startswith(input, "CVE") - vuln = NVD.fetch_cve(input) - pkgs = NVD.related_julia_packages(vuln) - push!(info["haystack_total"], "1 advisory from NVD") - specific_advisory_import = true - for (pkg, versioninfo, whys) in pkgs - advisories[(vuln.cve.id, pkg)] = versioninfo - advisory_details[(vuln.cve.id, pkg)] = whys - advisory_sources[(vuln.cve.id, pkg)] = "[$(vuln.cve.id) (NVD)](https://nvd.nist.gov/vuln/detail/$(vuln.cve.id))" - end - elseif startswith(input, "EUVD") - vuln = EUVD.fetch_enisa(input) - pkgs = EUVD.related_julia_packages(vuln) - push!(info["haystack_total"], "1 advisory from EUVD") - specific_advisory_import = true - vuln_id = EUVD.vuln_id(vuln) - for (pkg, versioninfo, whys) in pkgs - advisories[(vuln_id, pkg)] = versioninfo - advisory_details[(vuln.cve.id, pkg)] = whys - advisory_sources[(vuln.cve.id, pkg)] = "[$(vuln.cve.id) (NVD)](https://nvd.nist.gov/vuln/detail/$(vuln.cve.id))" - end - elseif endswith(input, r"GHSA-\w{4}-\w{4}-\w{4}") - vuln = GitHub.fetch_ghsa(input) - pkgs = GitHub.related_julia_packages(vuln) - push!(info["haystack_total"], "1 advisory from GitHub") - specific_advisory_import = true - vuln_id = GitHub.vuln_id(vuln, input) - for (pkg, versioninfo, whys) in pkgs - advisories[(vuln_id, pkg)] = versioninfo - advisory_details[(vuln_id, pkg)] = whys - advisory_sources[(vuln_id, pkg)] = "[$vuln_id (GHSA)]($(vuln.html_url))" - end + if startswith(input, "CVE") || startswith(input, "EUVD") || endswith(input, r"GHSA-\w{4}-\w{4}-\w{4}") + advisories[input] = AdvisoryDB.fetch_advisory(input) + push!(info["haystack_total"], "1 advisory ($input)") else - # A larger joint EUVD/NVD search, either by vendor:product or time + # A larger joint EUVD/NVD search, either by vendor:product or time. It's helpful to gather as + # many vulns as possible with these higher-level searches before going one-by-one due to API limits if startswith(input, "--project") _, proj= split(input, [' ','=']) vendorproducts = [k for (k,v) in AdvisoryDB.upstream_projects_by_vendor_product() if v == proj] @@ -63,178 +28,166 @@ function main() end elseif contains(input, ":") _, vendor, product = rsplit(":"*input, ":", limit=3, keepempty=true) - info["haystack"] = "search $vendor/$product" nvds, euvds = AdvisoryDB.fetch_product_matches(vendor, product) else - info["haystack"] = "searching recent NVD changes and EUVD publications" + info["haystack"] = "recent NVD/EUVD changes/publications" nvds = NVD.fetch_nvd_vulnerabilities() euvds = EUVD.fetch_vulnerabilities() end + found_nvds = length(nvds) + found_euvds = length(euvds) + additional_nvds = 0 + additional_euvds = 0 joint_ids = intersect(filter(startswith("CVE"), EUVD.vuln_id.(euvds)), (x->x.cve.id).(nvds)) - push!(info["haystack_total"], "$(length(nvds)) advisories from NVD") - for vuln in nvds - pkgs = NVD.related_julia_packages(vuln) - had_trouble = false - for (pkg, versioninfo, whys) in pkgs - advisories[(vuln.cve.id, pkg)] = versioninfo - advisory_details[(vuln.cve.id, pkg)] = whys - advisory_sources[(vuln.cve.id, pkg)] = "[$(vuln.cve.id) (NVD)](https://nvd.nist.gov/vuln/detail/$(vuln.cve.id))" - had_trouble |= any(startswith("fail"), whys) || versioninfo == ["*"] - end - if had_trouble - if vuln.cve.id in joint_ids - euvd = euvds[EUVD.vuln_id.(euvds) .== vuln.cve.id][1] - euvd_pkgs = EUVD.related_julia_packages(euvd) - for (pkg, versioninfo, whys) in euvd_pkgs - (haskey(advisories, (vuln.cve.id, pkg)) && advisories[(vuln.cve.id, pkg)] != ["*"]) && continue - advisories[(vuln.cve.id, pkg)] = versioninfo - advisory_details[(vuln.cve.id, pkg)] = whys - advisory_sources[(vuln.cve.id, pkg)] = "[EUVD](https://euvd.enisa.europa.eu/vulnerability/$(euvd.id))" - end - end - end - end - # Add remaining EUVDs - push!(info["haystack_total"], "$(length(euvds)) advisories from EUVD") - nvd_fixups = 0 + missing_nvds = found_euvds - length(joint_ids) for vuln in euvds + # EUVD is largely useful in having a sloppier search and some more-recently-populated + # product/version data. Always trust NVD first, only falling back to EUVD. This finds + # applicable advisories and adds them to the NVD pile. Ideally, this would always add + # all the missing NVDs here, but that's quite expensive at 6 seconds per fetch. vuln_id = EUVD.vuln_id(vuln) + @info "EUVD $vuln_id" + startswith(vuln_id, "CVE") || continue vuln_id in joint_ids && continue - pkgs = EUVD.related_julia_packages(vuln) - for (pkg, versioninfo, whys) in pkgs - advisories[(vuln_id, pkg)] = versioninfo - advisory_details[(vuln_id, pkg)] = whys - advisory_sources[(vuln_id, pkg)] = "[$vuln_id (EUVD)](https://euvd.enisa.europa.eu/vulnerability/$(vuln.id))" + # Only add the missing NVD advisory if there are only a few (6 minutes worth) or we know it's relevant + if missing_nvds <= 100 || AdvisoryDB.is_vulnerable(EUVD.advisory(vuln)) + sleep(6) + try + push!(nvds, NVD.fetch_cve(vuln_id)) + additional_nvds += 1 + catch ex + @info "failed to fetch NVD $vuln_id" ex + end end end - end - n_created = 0 - n_updated = 0 - for ((advisory_id, pkg), version_ranges) in advisories - # TODO: Skip known aliases where **we** are the originating advisory database - saved_advisory = get!(upstream_advisories, advisory_id, Dict{String,Any}()) - if haskey(saved_advisory, pkg) - if saved_advisory[pkg] isa String - push!(info["skips"], "$advisory_id: skipped because it is marked '$(saved_advisory[pkg])'") - continue - else - # We haved saved information. Avoid overwriting it _unless_ we very specifically asked for this very advisory explicitly - if !specific_advisory_import - # Only report this if there's a substantive change though - saved_advisory[pkg] != version_ranges && push!(info["skips"], "$advisory_id: skipped overwriting the existing $pkg=>$(saved_advisory[pkg]) with $version_ranges because we didn't explicitly ask for this advisory") - continue + for vuln in nvds + @info "NVD $(vuln.cve.id)" + adv = NVD.advisory(vuln) + if isempty(adv.affected) || !all(AdvisoryDB.has_upper_bound, adv.affected) + # See if we can get a tighter answer with EUVD data + euvd = get(euvds[EUVD.vuln_id.(euvds) .== vuln.cve.id], 1) do + additional_euvds += 1 + try EUVD.fetch_enisa(vuln.cve.id) catch _; nothing end + end + isnothing(euvd) && continue + @info "EUVD $(euvd.id)" + euvd_adv = EUVD.advisory(euvd) + if !isempty(euvd_adv.affected) && all(AdvisoryDB.has_upper_bound, euvd_adv.affected) + adv.affected = euvd_adv.affected + adv.database_specific["affected_source"] = euvd_adv.database_specific["source"] end end - n_updated += 1 - else - n_created += 1 + if AdvisoryDB.is_vulnerable(adv) + advisories[vuln.cve.id] = adv + end end - # This is a partial order sort, but these should be non-overlapping - saved_advisory[pkg] = sort(version_ranges, by=x->something(tryparse(VersionRange{VersionNumber}, x), x), lt=<) + push!(info["haystack_total"], "$found_nvds (+$additional_nvds) advisories from NVD") + push!(info["haystack_total"], "$found_euvds (+$additional_euvds) from EUVD") end - # Nice logging information for the possible + # Now create or update the found advisories: + n_modified = 0 + for (id, advisory) in advisories + @info "JLSEC for $id" + dir = mkpath(joinpath(@__DIR__, "..", "advisories", "published", string(Dates.year(Dates.now())))) + file = joinpath(dir, advisory.id * ".md") + n_modified += isfile(file) + open(file, "w") do io + AdvisoryDB.print(io, advisory) + end + end + n_total = length(advisories) + n_created = n_total - n_modified + + # Nice logging information for the possible pull request io = open(get(ENV, "GITHUB_OUTPUT", tempname()), "a+") - verb = n_updated > 0 && n_created == 0 ? "Update" : - n_updated == 0 && n_created > 0 ? "Newly tag" : "Modify" - unique_pkgs = unique(last.(keys(advisories))) + verb = n_modified > 0 && n_created == 0 ? "Update" : + n_modified == 0 && n_created > 0 ? "Publish" : "Publish and update" + unique_pkgs = unique(Iterators.flatten(AdvisoryDB.vulnerable_packages.(values(advisories)))) pkg_str = length(unique_pkgs) <= 3 ? join(unique_pkgs, ", ", " and ") : "$(length(unique_pkgs)) packages" - n_total = n_updated+n_created advisory_str = n_total == 1 ? "advisory" : "advisories" println(io, "title=[automatic] $verb $n_total $advisory_str for $pkg_str") println(io, "body<any(startswith("fail"), advisory_details[k]), advisories) + divide(f, x) = return (filter(f, x), filter(!f, x)) - failed_ids = unique(id for (id, pkg) in keys(failed_advisories)) - !isempty(failed_ids) && println(io, "### $(length(unique(failed_ids))) advisories had obvious failures\n\nThese advisories seem to apply to a Julia package but had trouble identifying exactly how and at which versions.") - for id in failed_ids - pkgs = last.(filter(==(id)∘first, keys(failed_advisories))) + # Now break the identified advisories into three sections. First, advisories for which failed to parse the upstream version: + failed_advisories, advisories = divide(((_,v),)->any(isnothing, (tryparse(AdvisoryDB.VersionRange, r) for entry in v.affected for (_,source_map) in entry.source_mapping for (r, _) in source_map)), advisories) + !isempty(failed_advisories) && println(io, "### $(length(failed_advisories)) advisories failed to parse the source version range\n\nThese advisories seem to apply to a Julia package but had trouble identifying exactly how and at which versions.") + for (id, adv) in sort(failed_advisories) + pkgs = AdvisoryDB.vulnerable_packages(adv) println(io, "* $id for packages: ", join("**" .* pkgs .* "**", ", ", ", and ")) - for pkg in pkgs - println(io, " * **$pkg**, using data from ", advisory_sources[(id, pkg)]) - println(io, " * " * join(advisory_details[(id, pkg)], "\n * ")) + for entry in adv.affected + println(io, " * **$(entry.pkg)**, matching `", join(keys(entry.source_mapping), "`, `", "`, and `"), "`. Failures include:") + for (source, version_map) in entry.source_mapping + for (v, _) in version_map + isnothing(tryparse(AdvisoryDB.VersionRange, v)) || continue + println(io, " * `", source, "` version `", v, "`") + end + end end end - !isempty(failed_ids) && println(io) + !isempty(failed_advisories) && println(io) # Next advisories for which all versions apply - star_advisories, advisories = divide(==(["*"])∘last, advisories) - star_ids = unique(id for (id, pkg) in keys(star_advisories)) - !isempty(star_ids) && println(io, "### $(length(unique(star_ids))) advisories apply to all registered versions of a package\n\nThese advisories had no obvious failures but computed a range without bounds.") - for id in star_ids - pkgs = last.(filter(==(id)∘first, keys(star_advisories))) + star_advisories, advisories = divide(((_,x),)->any(entry->entry.ranges==[VersionRange{VersionNumber}("*")], x.affected), advisories) + !isempty(star_advisories) && println(io, "### $(length(star_advisories)) advisories apply to all registered versions of a package\n\nThese advisories had no obvious failures but computed a range without bounds.") + for (id, adv) in sort(star_advisories) + pkgs = AdvisoryDB.vulnerable_packages(adv) println(io, "* $id for packages: ", join("**" .* pkgs .* "**", ", ", ", and ")) - for pkg in pkgs - println(io, " * **$pkg**, using data from ", advisory_sources[(id, pkg)]) - println(io, " * " * join(advisory_details[(id, pkg)], "\n * ")) + for entry in adv.affected + println(io, " * **$(entry.pkg)**, matching `", join(keys(entry.source_mapping), "`, `", "`, and `"), "`. Unbounded mappings are:") + for (source, version_map) in entry.source_mapping + for (v, r) in version_map + r == [VersionRange{VersionNumber}("*")] || continue + println(io, " * `", source, "` version `", v, "`") + end + end end end - !isempty(star_ids) && println(io) + !isempty(star_advisories) && println(io) # Next advisories for which there's an unbounded upper range - unbounded_advisories, advisories = divide(((_,v),)->any(!AdvisoryDB.has_upper_bound∘VersionRange, v), advisories) - unbounded_ids = unique(id for (id, pkg) in keys(unbounded_advisories)) - !isempty(unbounded_ids) && println(io, "### $(length(unique(unbounded_ids))) advisories apply to all registered versions of a package\n\nThese advisories had no obvious failures but computed a range without bounds.") - for id in unbounded_ids - pkgs = last.(filter(==(id)∘first, keys(unbounded_advisories))) + unbounded_advisories, advisories = divide(((_,x),)->any(entry->any(!AdvisoryDB.has_upper_bound, entry.ranges), x.affected), advisories) + !isempty(unbounded_advisories) && println(io, "### $(length(unbounded_advisories)) advisories apply to the latest version of a package and do not have a patch") + for (id, adv) in sort(unbounded_advisories) + pkgs = AdvisoryDB.vulnerable_packages(adv) println(io, "* $id for packages: ", join("**" .* pkgs .* "**", ", ", ", and ")) - for pkg in pkgs - println(io, " * **$pkg** computed `$(repr(unbounded_advisories[(id, pkg)]))` using data from ", advisory_sources[(id, pkg)]) - println(io, " * " * join(advisory_details[(id, pkg)], "\n * ")) + for entry in adv.affected + println(io, " * **$(entry.pkg)**, matching `", join(keys(entry.source_mapping), "`, `", "`, and `"), "`. Unbounded mappings are:") + for (source, version_map) in entry.source_mapping + for (v, r) in version_map + AdvisoryDB.has_upper_bound(r) && continue + println(io, " * `", source, "` version `", v, "` mapped to `[", join(string.(r), ", "), "]`") + end + end end end - !isempty(unbounded_ids) && println(io) + !isempty(unbounded_advisories) && println(io) # And finally all remaining advisories. - remaining_ids = unique(id for (id, pkg) in keys(advisories)) - !isempty(advisories) && println(io, "### $(length(unique(advisories))) advisories found concrete vulnerable ranges\n\n") - for id in remaining_ids - pkgs = last.(filter(==(id)∘first, keys(advisories))) - println(io, "* $id for packages: ", join("**" .* pkgs .* "**", ", ", ", and ")) - for pkg in pkgs - println(io, " * **$pkg** computed `$(repr(advisories[(id, pkg)]))` using data from ", advisory_sources[(id, pkg)]) - println(io, " * " * join(advisory_details[(id, pkg)], "\n * ")) + !isempty(advisories) && println(io, "### $(length(advisories)) advisories found concrete vulnerable ranges\n\n") + for (id, adv) in sort(advisories) + pkgs = AdvisoryDB.vulnerable_packages(adv) + html_url = get(adv.database_specific, "affected_source", adv.database_specific["source"])["html_url"] + println(io, "* [$id]($html_url) for packages: ", join("**" .* pkgs .* "**", ", ", ", and ")) + for entry in adv.affected + println(io, " * **$(entry.pkg)** at `[$(join('"'.*string.(entry.ranges).*'"', ", "))]`, matching `", join(keys(entry.source_mapping), "`, `", "`, and `"), "`") end end !isempty(advisories) && println(io) - if !isempty(info["skips"]) - println(io, "There were $(length(info["skips"])) advisories skipped for the following reasons:") - println(io, join("* " .* info["skips"], "\n")) - println(io) - end - println(io, """ - The version ranges tagged here should be confirmed or adjusted, and if the particular advisory is not applicable to a given package, its value should instead be a string detailing the rationale. - BODY_EOF""") + println(io, "BODY_EOF") seekstart(io) foreach(println, eachline(io)) # Also log to stdout - - open(upstream_advisories_file,"w+") do f - println(f, """ - ### This file contains a table of advisories and the packages to which they apply ### - # - # The schema is relatively simple: - # - Advisories are the top-level keys by ID - # - if multiple aliases exist, prefer CVE over GHSA over EUVD - # - Each advisory entry has Julia package names (without .jl suffix) as its keys - # - The value for a particular advisory[package] is either: - # - A list of GitHub Security Advisory style version ranges to which the advisory applies - # - A string detailing _why_ a given advisory does not apply - # - # This file is semi-automatically updated by suggested pull requests. By marking a package with an - # exception string, you prevent further automatic updates from suggesting the advisory apply. - """) - TOML.print(f, upstream_advisories, sorted=true) - end end -main() +if abspath(PROGRAM_FILE) == @__FILE__ + main() +end diff --git a/scripts/sync_upstream_advisories.jl b/scripts/sync_upstream_advisories.jl deleted file mode 100644 index c717a2e6..00000000 --- a/scripts/sync_upstream_advisories.jl +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env julia - -using AdvisoryDB: AdvisoryDB, NVD, EUVD, GitHub -using JSON3: JSON3 -using TOML: TOML - -function main() - upstream_advisories = TOML.parsefile(joinpath(@__DIR__, "..", "upstream_advisories.toml")) - n_published = 0 - n_updated = 0 - used_advisories = Set{String}() - used_pkgs = Set{String}() - packages_dir = joinpath(@__DIR__, "..", "packages", "General") - for (advisory_id, applicable_packages) in upstream_advisories - for (pkg, versioninfo) in applicable_packages - versioninfo isa String && continue - osv = AdvisoryDB.fetch_advisory(advisory_id, pkg => versioninfo) - jlsec_id = AdvisoryDB.corresponding_jlsec_id(pkg, advisory_id, get(osv, :aliases, String[])) - if isnothing(jlsec_id) - AdvisoryDB.create!(pkg, osv) - n_published += 1 - push!(used_pkgs, pkg) - push!(used_advisories, advisory_id) - else - did_update = AdvisoryDB.update!(joinpath(packages_dir, pkg, jlsec_id * ".json"), osv) - if did_update - n_updated += 1 - push!(used_pkgs, pkg) - push!(used_advisories, advisory_id) - end - end - end - end - n_synced = length(used_advisories) - @info "published: $n_published" - @info "updated: $n_updated" - @info "for packages: $(join(used_pkgs, ", ", ", and "))" - @info "and advisories: $(join(used_advisories, ", ", ", and "))" - io = open(get(ENV, "GITHUB_OUTPUT", tempname()), "a+") - publish_str = n_published > 0 ? "publish $n_published " : "" - update_str = n_updated > 0 ? "$(n_published > 0 ? "and " : "")update $n_updated" : "" - advisory_str = n_published + n_updated == 0 ? "no changes???" : n_published + n_updated == 1 ? "advisory" : "advisories" - pkg_str = length(used_pkgs) <= 3 ? join(used_pkgs, ", ", ", and ") : "$(length(used_pkgs)) packages" - println(io, "title=[automatic] sync $n_synced upstream advisories for $pkg_str: $publish_str$update_str $advisory_str") - println(io, """ - body< length(products) - throw(ArgumentError("got more vendors than products")) + elseif length(vendors) != 1 && length(products) == 1 + products = fill(products[1], length(vendors)) + else + products = vcat(products, fill(Dict(), length(vendors) - length(products))) end zip(vendors,products) end -vendor_product_versions(vuln) = [(get(get(v, :vendor, Dict()), :name, ""), get(get(p, :product, Dict), :name, ""), get(p, :product_version, "")) for (v,p) in vpzip(vuln.enisaIdVendor, vuln.enisaIdProduct)] +function vendor_product_versions(vuln) + [(get(get(v, :vendor, Dict()), :name, ""), get(get(p, :product, Dict), :name, ""), get(p, :product_version, "")) for (v,p) in vpzip(vuln.enisaIdVendor, vuln.enisaIdProduct)] +end function build_headers() headers = [ @@ -98,6 +104,11 @@ function fetch_enisa(id) return fetch_page(string(API_BASE, "/enisaid?id=", id), headers) end +function fetch_advisory(id) + headers = build_headers() + return fetch_page(string(API_BASE, "/advisory?id=", id), headers) +end + function fetch_product_matches(vendor, product) headers = build_headers() @@ -120,13 +131,13 @@ function fetch_vulnerabilities() return fetch_all_pages(string(API_BASE, "/search"), headers, params) end -related_julia_packages(vuln) = AdvisoryDB.related_julia_packages(vuln.description, vendor_product_versions(vuln)) +affected_julia_packages(vuln) = AdvisoryDB.affected_julia_packages(vuln.description, vendor_product_versions(vuln)) function filter_julia_vulnerabilities(vulnerabilities) julia_vulnerabilities = [] for vuln in vulnerabilities - if !isempty(related_julia_packages(vuln)) + if !isempty(affected_julia_packages(vuln)) push!(julia_vulnerabilities, vuln) end end @@ -137,92 +148,35 @@ end vuln_id(vuln) = get(filter(startswith("CVE-"), split(get(vuln, :aliases, ""))), 1, get(filter(startswith("GHSA-"), split(get(vuln, :aliases, ""))), 1, vuln.id)) -parse_euvd_datetime(str) = string(DateTime(str, dateformat"u d, y, H:M:S p")) * "Z" # TODO: WHAT'S THE TIMEZONE?? -function convert_to_osv(vuln, package_versioninfo = nothing) - osv = Dict{String, Any}() - - # Required OSV fields - osv["schema_version"] = "1.7.2" - osv["id"] = vuln.id - osv["modified"] = parse_euvd_datetime(vuln.dateUpdated) - - # Optional fields - if exists(vuln, :datePublished) - osv["published"] = parse_euvd_datetime(vuln.datePublished) - end - # No withdrawn information? - # Aliases are typically missing GHSAs; those could come from references. - aliases = String[] - if exists(vuln, :aliases) - append!(aliases, strip.(split(vuln.aliases, "\n"; keepempty=false))) - end - if !isempty(aliases) - osv["aliases"] = aliases - end - # No upstream information - # No related tags (beyond references) - - # Summary and details from descriptions, using English only - if exists(vuln, :description) - description = vuln.description - flat_description = replace(description, r"\s+"=>" ") - # Use first sentence as summary, full text as details - if length(flat_description) > 100 - summary_end = findfirst(". ", flat_description) - if summary_end !== nothing - osv["summary"] = flat_description[1:summary_end[1]] - osv["details"] = description +parse_euvd_datetime(str) = string(DateTime(str, dateformat"u d, y, H:M:S p")) * "Z" # TODO: confirm timezone is UTC +function advisory(vuln) + affected = affected_julia_packages(vuln) + upstream_type = Dict("alias"=>:aliases,"upstream"=>:upstream)[get(unique(map(x->x.source_type, affected)), 1, "alias")] + + return Advisory(; + # withdrawn -- not structured; it's unstructured plaintext in the description :( + upstream_type => String[vuln.id, strip.(split(get(vuln, :aliases, ""), "\n"; keepempty=false))...], + # related -- nothing structured + summary = if exists(vuln, :description) extract_summary(vuln.description) end, + details = get(vuln, :description, nothing), + severity = if exists(vuln, :baseScoreVector) && exists(vuln, :baseScoreVersion) + Severity[Severity(type = "CVSS_V"*vuln.baseScoreVersion[1], score = string(vuln.baseScoreVector))] else - osv["summary"] = flat_description[1:min(100, length(description))] * "..." - osv["details"] = description - end - else - osv["summary"] = flat_description - osv["details"] = description - end - end - - # CVSS severity information - if exists(vuln, :baseScoreVector) && exists(vuln, :baseScoreVersion) - osv["severity"] = [Dict( - "type" => "CVSS_V"*vuln.baseScoreVersion[1], - "score" => string(vuln.baseScoreVector) - )] - end - - # Affected _Julia_ packages, connecting CPE data to the package. - package_versioninfos = isnothing(package_versioninfo) ? related_julia_packages(vuln) : [package_versioninfo] - affected = [] - for (package, versioninfo) in package_versioninfos - affected_entry = Dict{String, Any}() - affected_entry["package"] = Dict( - "ecosystem" => "Julia", - "name" => package + Severity[] + end, + affected = affected, + references = [Reference(url=ref) for ref in split(get(vuln, :references, ""), "\n"; keepempty=false)], + # credits -- not structured + database_specific = Dict{String,Any}("source" => Dict( + "id" => vuln.id, + "modified" => if exists(vuln, :dateUpdated) parse_euvd_datetime(vuln.dateUpdated) end, + "published" => if exists(vuln, :datePublished) parse_euvd_datetime(vuln.datePublished) end, + "imported" => AdvisoryDB.now(), + "url" => string(API_BASE, "/enisaid?id=", vuln.id), + "html_url" => string("https://euvd.enisa.europa.eu/vulnerability/", vuln.id) + ) ) - range_events = AdvisoryDB.osv_events(AdvisoryDB.VersionRange{VersionNumber}.(versioninfo)) - affected_entry["ranges"] = [Dict("type"=>"SEMVER", "events"=>range_events)] - push!(affected, affected_entry) - end - if !isempty(affected) - osv["affected"] = affected - end - - # References - references = [] - if haskey(vuln, :references) - for ref in split(vuln.references, "\n") - push!(references, Dict( - "type" => "WEB", - "url" => ref - )) - end - end - if !isempty(references) - osv["references"] = references - end - - # No structured credits - return osv + ) end end diff --git a/src/GitHub.jl b/src/GitHub.jl index d63ff15b..26574b95 100644 --- a/src/GitHub.jl +++ b/src/GitHub.jl @@ -5,7 +5,7 @@ using JSON3 using Dates using DataStructures: OrderedDict as Dict # watch out -using ..AdvisoryDB: AdvisoryDB, exists, VersionRange, VersionString +using ..AdvisoryDB: AdvisoryDB, exists, VersionRange, VersionString, Credit, Reference, Severity, Advisory const GITHUB_API_BASE = "https://api.github.com" const DEFAULT_HOURS = 25 @@ -225,104 +225,60 @@ function vendor_product_versions(advisory) return vpv end -related_julia_packages(advisory) = AdvisoryDB.related_julia_packages(get(advisory, :description, ""), vendor_product_versions(advisory)) +affected_julia_packages(advisory) = AdvisoryDB.affected_julia_packages(get(advisory, :description, ""), vendor_product_versions(advisory)) -function convert_to_osv(advisory, package_versioninfo = nothing) - osv = Dict{String, Any}() +function advisory(vuln) + affected = affected_julia_packages(vuln) + upstream_type = Dict("alias"=>:aliases,"upstream"=>:upstream)[get(unique(map(x->x.source_type, affected)), 1, "alias")] - osv["schema_version"] = "1.7.2" - osv["id"] = "DONOTUSEJLSEC-0000" - osv["published"] = AdvisoryDB.now() - osv["modified"] = AdvisoryDB.now() + # Aliases are in multiple places: + aliases = String[vuln.ghsa_id] + exists(vuln, :cve_id) && push!(aliases, vuln.cve_id) + exists(vuln, :identifiers) && append!(aliases, getproperty.(vuln.identifiers, :value)) + unique!(aliases) - if exists(advisory, :withdrawn_at) - osv["withdrawn"] = advisory.withdrawn_at - end - aliases = [advisory.ghsa_id] - if exists(advisory, :cve_id) - push!(aliases, advisory.cve_id) - end - osv["aliases"] = aliases - - # No upstream information is represented in GHSA - # No structured related information is represented in GHSA - if exists(advisory, :summary) - osv["summary"] = advisory.summary - end - if exists(advisory, :description) - osv["details"] = advisory.description - end # GitHub stores severities all over the place - severities = [] - if exists(advisory, :cvss_severities, :cvss_v3, :vector_string) - push!(severities, Dict( - "type" => "CVSS_V3", - "score" => advisory.cvss_severities.cvss_v3.vector_string - )) + severities = Severity[] + if exists(vuln, :cvss_severities, :cvss_v3, :vector_string) + push!(severities, Severity("CVSS_V3", vuln.cvss_severities.cvss_v3.vector_string)) end - if exists(advisory, :cvss_severities, :cvss_v4, :vector_string) - push!(severities, Dict( - "type" => "CVSS_V4", - "score" => advisory.cvss_severities.cvss_v4.vector_string - )) + if exists(vuln, :cvss_severities, :cvss_v4, :vector_string) + push!(severities, Severity("CVSS_V4", vuln.cvss_severities.cvss_v4.vector_string)) end - if exists(advisory, :cvss, :vector_string) && - !(advisory.cvss.vector_string in getindex.(severities, "score")) - cvss_ver = match(r"^CVSS:([34])", vs) - isnothing(cvss_ver) && error("Unknown CVSS vector string: $(advisory.cvss.vector_string) in $(advisory.ghsa_id)") - push!(severities, Dict( - "type" => "CVSS_V$cvss_ver", - "score" => advisory.cvss.vector_string - )) - end - if !isempty(severities) - osv["severity"] = severities + if exists(vuln, :cvss, :vector_string) + push!(severities, Severity(vuln.cvss.vector_string)) end - package_versioninfos = isnothing(package_versioninfo) ? related_julia_packages(advisory) : [package_versioninfo] - affected = [] - for (package, versioninfo) in package_versioninfos - affected_entry = Dict{String, Any}() - affected_entry["package"] = Dict( - "ecosystem" => "Julia", - "name" => package - ) - range_events = AdvisoryDB.osv_events(AdvisoryDB.VersionRange{VersionNumber}.(versioninfo)) - affected_entry["ranges"] = [Dict("type"=>"SEMVER", "events"=>range_events)] - push!(affected, affected_entry) - end - if !isempty(affected) - osv["affected"] = affected + # Credits are also messy + credits = Credit[] + for c in something(get(vuln, :credits, nothing), []) + name = something(get(c, :login, nothing), get(get(c, :user, Dict()), :login, nothing), missing) + ismissing(name) && continue + contact = get(get(c, :user, Dict()), :html_url, nothing) + type = get(c, :type) + push!(credits, Credit(name, contact === nothing ? String[] : [contact], type)) end - references = [] - if haskey(advisory, :html_url) - push!(references, Dict( - "type" => "ADVISORY", - "url" => advisory.html_url - )) - end - if !isempty(references) - osv["references"] = references - end - - osv["database_specific"] = Dict{String,Any}() - osv["database_specific"]["source"] = Dict("id" => advisory.ghsa_id, "url" => advisory.url) - - githubdb = Dict() - if exists(advisory, :cwe_ids) - githubdb["cwe_ids"] = collect(advisory.cwe_ids) - end - if exists(advisory, :severity) - githubdb["severity"] = advisory.severity - end - if exists(advisory, :state) - githubdb["state"] = advisory.state - end - if !isempty(githubdb) - osv["database_specific"]["source_database"] = githubdb - end - return osv + return Advisory(; + withdrawn = exists(vuln, :withdrawn_at) ? Dates.now(Dates.UTC) : nothing, + upstream_type => aliases, + # related -- nothing structured + summary = get(vuln, :summary, nothing), + details = get(vuln, :description, nothing), + severity = severities, + affected = affected, + references = [Reference(url=ref) for ref in something(get(vuln, :references, nothing), [])], + credits = credits, + database_specific = Dict{String,Any}("source" => Dict( + "id" => vuln.ghsa_id, + "modified" => vuln.updated_at, + "published" => vuln.published_at, + "imported" => AdvisoryDB.now(), + "url" => vuln.url, + "html_url" => vuln.html_url + ) + ) + ) end diff --git a/src/NVD.jl b/src/NVD.jl index f1243745..a97000a6 100644 --- a/src/NVD.jl +++ b/src/NVD.jl @@ -6,7 +6,7 @@ using Dates using TOML: TOML using DataStructures: OrderedDict as Dict # watch out -using ..AdvisoryDB: AdvisoryDB, exists +using ..AdvisoryDB: AdvisoryDB, exists, Severity, Advisory, Reference, Credit, extract_summary const NVD_API_BASE = "https://services.nvd.nist.gov/rest/json/cves/2.0" const NVD_CPE_API_BASE = "https://services.nvd.nist.gov/rest/json/cpes/2.0" @@ -242,13 +242,13 @@ function vendor_product_versions(vuln) return unique!(vpvs) end -related_julia_packages(vuln) = AdvisoryDB.related_julia_packages(english_description(vuln), vendor_product_versions(vuln)) +affected_julia_packages(vuln) = AdvisoryDB.affected_julia_packages(english_description(vuln), vendor_product_versions(vuln)) function filter_julia_vulnerabilities(vulnerabilities) julia_vulnerabilities = [] for vuln in vulnerabilities - if !isempty(related_julia_packages(vuln)) + if !isempty(affected_julia_packages(vuln)) push!(julia_vulnerabilities, vuln) end end @@ -267,165 +267,45 @@ function english_description(vuln) return "" end -function version_string(node) - # Output a GHSA-like version string for a given configuration node - -end - -function convert_to_osv(vuln, package_versioninfo = nothing) - osv = Dict{String, Any}() - - # Required OSV fields - osv["schema_version"] = "1.7.2" - osv["id"] = "DONOTUSEJLSEC-0000" - osv["published"] = AdvisoryDB.now() - osv["modified"] = AdvisoryDB.now() - - # NVD does not include withdrawn information - - # The vuln id _either_ becomes an alias **or** an upstream advisory. - # NVD does not provide its own structured alias/upstream info. - # TODO: I think some vulns may be slightly better described as "upstream", but it's not obvious to me - osv["aliases"] = [vuln.cve.id] - - # No related tags (beyond references) - - # Summary and details from descriptions, using English only - description = english_description(vuln) - if !isempty(description) - flat_description = replace(description, r"\s+"=>" ") - # Use first sentence as summary, full text as details - if length(flat_description) > 100 - summary_end = findfirst(". ", flat_description) - if summary_end !== nothing - osv["summary"] = flat_description[1:summary_end[1]] - osv["details"] = description - else - osv["summary"] = flat_description[1:min(100, length(description))] * "..." - osv["details"] = description - end - else - osv["summary"] = flat_description - osv["details"] = description - end - end - - # CVSS severity information - if exists(vuln.cve, :metrics) - severity_info = [] - - # Check for CVSS metrics - for (version, metrics) in pairs(vuln.cve.metrics) - v = match(r"^cvssMetricV([234])", string(version)) - if !isnothing(v) - for metric in metrics - if exists(metric, :cvssData) - push!(severity_info, Dict( - "type" => "CVSS_V"*v[1], - "score" => string(metric.cvssData.vectorString) - )) - break - end - end - end - end - - if !isempty(severity_info) - osv["severity"] = severity_info +function advisory(vuln) + affected = affected_julia_packages(vuln) + upstream_type = Dict("alias"=>:aliases,"upstream"=>:upstream)[get(unique(map(x->x.source_type, affected)), 1, "alias")] + + # Severities are a little complicated + severities = Severity[] + for (version, metrics) in pairs(get(vuln.cve, :metrics, Dict())) + v = match(r"^cvssMetricV([234])", string(version)) + isnothing(v) && continue + for metric in metrics + exists(metric, :cvssData, :vectorString) || continue + push!(severities, Severity( + type = "CVSS_V"*v[1], + score = string(metric.cvssData.vectorString) + )) + break # We'll just find the first such metric; there may be more end end - # Affected _Julia_ packages, either explicitly passed - package_versioninfos = isnothing(package_versioninfo) ? related_julia_packages(vuln) : [package_versioninfo] - affected = [] - for (package, versioninfo) in package_versioninfos - affected_entry = Dict{String, Any}() - affected_entry["package"] = Dict( - "ecosystem" => "Julia", - "name" => package + return Advisory(; + withdrawn = (lowercase(get(vuln.cve, :vulnStatus, "")) == "rejected") ? Dates.now(Dates.UTC) : nothing, + upstream_type => String[vuln.cve.id], + # related -- nothing structured + summary = extract_summary(english_description(vuln)), + details = english_description(vuln), + severity = severities, + affected = affected, + references = [Reference(url=ref.url) for ref in get(vuln.cve, :references, []) if haskey(ref, :url)], + # credits -- not structured + database_specific = Dict{String,Any}("source" => Dict( + "id" => vuln.cve.id, + "modified" => vuln.cve.lastModified * "Z", + "published" => vuln.cve.published * "Z", + "imported" => AdvisoryDB.now(), + "url" => string(NVD_API_BASE, "?cveId=", vuln.cve.id), + "html_url" => string("https://nvd.nist.gov/vuln/detail/", vuln.cve.id) + ) ) - range_events = AdvisoryDB.osv_events(AdvisoryDB.VersionRange{VersionNumber}.(versioninfo)) - affected_entry["ranges"] = [Dict("type"=>"SEMVER", "events"=>range_events)] - push!(affected, affected_entry) - end - if !isempty(affected) - osv["affected"] = affected - end - - # References - references = [] - if haskey(vuln.cve, :references) - for ref in vuln.cve.references - if haskey(ref, :url) - push!(references, Dict( - "type" => "WEB", - "url" => ref.url - )) - end - end - end - if !isempty(references) - osv["references"] = references - end - - # No structured credits - return osv -end - -function get_first_package_name_nvd(vuln) - # Extract package name from CPE data or fallback to CVE ID - if haskey(vuln.cve, :configurations) - for config in vuln.cve.configurations - if haskey(config, :nodes) - for node in config.nodes - if haskey(node, :cpeMatch) - for cpe_match in node.cpeMatch - if haskey(cpe_match, :criteria) - criteria = cpe_match.criteria - if occursin("julia", lowercase(criteria)) - cpe_parts = split(criteria, ":") - if length(cpe_parts) >= 6 - return cpe_parts[5] # product name - end - end - end - end - end - end - end - end - end - - # Fallback to using CVE ID as package name - return replace(vuln.cve.id, "CVE-" => "cve_") -end - -function write_nvd_advisory_files(vulnerabilities) - packages_dir = "packages" - - if !isdir(packages_dir) - mkdir(packages_dir) - end - - for vuln in vulnerabilities - osv_data = convert_to_osv(vuln) - package_name = get_first_package_name_nvd(vuln) - - package_dir = joinpath(packages_dir, package_name) - if !isdir(package_dir) - mkdir(package_dir) - end - - filename = "$(vuln.cve.id).json" - filepath = joinpath(package_dir, filename) - - println("Writing NVD advisory: $filepath") - open(filepath, "w") do f - JSON3.pretty(f, osv_data) - end - end - - println("Completed writing $(length(vulnerabilities)) NVD advisories to disk") + ) end end \ No newline at end of file diff --git a/src/advisory.jl b/src/advisory.jl new file mode 100644 index 00000000..b31bc6ac --- /dev/null +++ b/src/advisory.jl @@ -0,0 +1,309 @@ +using DataStructures: OrderedDict +using Dates: Dates, DateTime +using Markdown: Markdown + +""" + PackageVulnerability(; pkgs, ranges, source_type=nothing, source_mapping=nothing) + +Represent an item in OSV's `affected` array, but using ranges of `VersionRange` instead of named events. + +The `source_type` and `source_mapping` are a bit of *unserialized* metadata to "show the work" of doing version conversion, +particularly of importance when the type is `"upstream"` and the version mappings are nontrivial. +""" +@kwdef struct PackageVulnerability + pkg::String + ranges::Vector{VersionRange{VersionNumber}} + source_type::Union{Nothing, String} = nothing # or "upstream" or "alias" + source_mapping::Union{Nothing, AbstractDict} = nothing +end +function Base.convert(::Type{PackageVulnerability}, d::AbstractDict) + PackageVulnerability(; Dict(Symbol(k)=>(Symbol(k) == :ranges ? VersionRange{VersionNumber}.(v) : v) for (k,v) in d)...) +end +function Base.:(==)(a::PackageVulnerability, b::PackageVulnerability) + return a.pkg == b.pkg && a.ranges == b.ranges && a.source_type == b.source_type && a.source_mapping == b.source_mapping +end +function Base.hash(a::PackageVulnerability, h::UInt) + return hash(a.pkg, hash(a.ranges, hash(a.source_type, hash(a.source_mapping, hash(0x30652ead7d10dc57, h))))) +end +is_vulnerable(v::PackageVulnerability) = !isempty(v.ranges) +has_lower_bound(v::PackageVulnerability) = all(has_lower_bound, v.ranges) +has_upper_bound(v::PackageVulnerability) = all(has_upper_bound, v.ranges) + +""" + Reference(; url, type="WEB") + +Represent a URL in OSV's reference field. Assumes `"WEB"` if the type is not in OSV schema. +""" +@kwdef struct Reference + type::String = "WEB" + url::String + function Reference(type, url) + TYPE = uppercase(convert(String, type)) + if TYPE ∉ ("ADVISORY", "ARTICLE", "DETECTION", "DISCUSSION", "REPORT", + "FIX", "INTRODUCED", "PACKAGE", "EVIDENCE", "WEB") + @warn "unknown reference type $(repr(type))" + TYPE = "WEB" + end + return new(TYPE, convert(String, url)) + end +end +Base.convert(::Type{Reference}, s::AbstractString) = Reference(; url = s) +Base.convert(::Type{Reference}, d::AbstractDict) = Reference(; Dict(Symbol(k)=>v for (k,v) in d)...) + +""" + Credit(; name, contact=String[], type=nothing) + Credit(shorthand) + +Represent a :credit field item. Supports reading and writing a shorthand `\$name <\$email>` string syntax. +The `type` must be a value in OSV schema or will be omitted. +""" +@kwdef struct Credit + name::String + contact::Vector{String} = String[] + type::Union{String,Nothing} = nothing + function Credit(name, contact, type) + if type !== nothing + type = uppercase(convert(String, type)) + if type ∉ ("FINDER", "REPORTER", "ANALYST", "COORDINATOR", "REMEDIATION_DEVELOPER", + "REMEDIATION_REVIEWER", "REMEDIATION_VERIFIER", "TOOL", "SPONSOR", "OTHER") + @warn "unknown credit type $type" + type = nothing + end + end + new(convert(String, name), convert(Vector{String}, contact), type) + end +end +function Credit(shorthand::AbstractString) + # Support typical "Name " shorthand + m = match(r"^\s*(.+?)\s<([^>]+@[^>]+)>\s*$", shorthand) + if m === nothing + return Credit(shorthand, String[], nothing) + else + return Credit(m.captures[1], [string("mailto:", m.captures[2])], nothing) + end +end +Base.convert(::Type{Credit}, s::AbstractString) = Credit(s) +Base.convert(::Type{Credit}, d::AbstractDict) = Credit(; Dict(Symbol(k)=>v for (k,v) in d)...) +Base.:(==)(a::Credit, b::Credit) = isequal(a.name, b.name) && isequal(a.contact, b.contact) && isequal(a.type, b.type) +Base.hash(a::Credit, h::UInt) = hash(a.name, hash(a.contact, hash(a.type, hash(0x6a890f8c2b38fe87, h)))) + +""" + Severity(; type, score) + Severity(score) + +Represent a CVSS severity. If no type is given, CVSS_V2/3/4 are auto-detected. +""" +@kwdef struct Severity + type::String + score::String +end +function Severity(score) + s = tryparse(Severity, score) + isnothing(s) && throw(ArgumentError("cannot parse severity score $score")) + return s +end +Base.convert(::Type{Severity}, s::AbstractString) = Severity(s) +Base.convert(::Type{Severity}, d::AbstractDict) = Severity(; Dict(Symbol(k)=>v for (k,v) in d)...) +function Base.tryparse(::Type{Severity}, score) + if startswith(score, r"^AV:[LAN]\/AC:[HML]\/Au:[MSN]\/C:[NPC]\/I:[NPC]\/A:[NPC]") + type = "CVSS_V2" + elseif (m = match(r"^CVSS:([34])", score); m !== nothing) + type = "CVSS_V$(m.captures[1])" + else + # TODO: Should this assume medium/high/critical are Ubuntu's definitions? + return nothing + end + return Severity(type, String(score)) +end + +""" + Advisory(; osv_kwargs...) + +Represent an advisory using OSV schema's definitions for nearly all its fields. +There is just one place where we differ: +* `affected` is a vector of the differently-structured [`PackageVulnerability`](@ref) +""" +@kwdef mutable struct Advisory + ## OSV fields + schema_version::String = "1.7.3" + # The identifier and dates may be re-written by GitHub Actions upon publication and modification + id::String = string(PREFIX, "-0000-", string(rand(UInt64), base=36)) + modified::DateTime = Dates.now(Dates.UTC) + published::Union{DateTime, Nothing} = nothing + withdrawn::Union{DateTime, Nothing} = nothing + aliases::Vector{String} = String[] + upstream::Vector{String} = String[] + related::Vector{String} = String[] + summary::Union{String, Nothing} = nothing + details::Union{String, Nothing} = nothing + # The more complicated fields: + severity::Vector{Severity} = Severity[] + affected::Vector{PackageVulnerability} = PackageVulnerability[] + references::Vector{Reference} = Reference[] + credits::Vector{Credit} = Credit[] + ## JULSEC-specific fields + database_specific::Dict{String,Any} = Dict{String,Any}() # TODO: define these fields? +end +# Advisory identity is purely determined by the serialization format +function Base.:(==)(a::Advisory, b::Advisory) + return to_toml_frontmatter(a) == to_toml_frontmatter(b) && a.summary == b.summary && a.details == b.details +end +function Base.hash(a::Advisory, h::UInt) + return hash(to_toml_frontmatter(a), hash(a.summary, hash(a.details, hash(0x913cfa4716e3f874, h)))) +end +""" + is_vulnerable(x) + +Return `true` if the `Advisory` or `PackageVulnerability` has a non-empty set of versions +""" +is_vulnerable(a::Advisory) = any(is_vulnerable, a.affected) +vulnerable_packages(a::Advisory) = [entry.pkg for entry in a.affected if is_vulnerable(entry)] + +""" + update(original::Advisory, updates::Advisory) + +Given an `original` advisory and some `updates`, return a new advisory with the same ID +and dates +""" +function update(original::Advisory, updates::Advisory) + return Advisory(; + # use whatever the default `schema_version` is + id = original.id, + modified = original.modified, # This may or may not get overwritten later + published = original.published, + withdrawn = something(original.withdrawn, new.withdrawn, Some(nothing)), + ## All other fields are directly taken from the new advisory + aliases = new.aliases, + upstream = new.upstream, + related = new.related, + summary = new.summary, + details = new.details, + severity = new.severity, + affected = new.affected, + references = new.references, + credits = new.credits, + database_specific = new.database_specific, + ) +end + +#### IO and serialization #### + +# TOML creation. The one funny thing we do here is that the JULSEC parser supports a few +# shorthand idioms. But only do this if _all_ values in a collection can be represented +# with such shorthands. +""" + to_toml_frontmatter(x) + +Recursively convert an Advisory and all its fields (except `summary` and `details`) to serializable values for `TOML.print` +""" +to_toml_frontmatter(v::Union{VersionNumber, VersionString, VersionRange}) = string(v) +to_toml_frontmatter(x::Union{AbstractString, Integer, AbstractFloat, Bool, Dates.DateTime, Dates.Time, Dates.Date}) = x +to_toml_frontmatter(d::AbstractDict) = OrderedDict(k=>to_toml_frontmatter_collection(v, values(d)) for (k,v) in d) +to_toml_frontmatter(A::AbstractArray) = [to_toml_frontmatter_collection(x, A) for x in A] +to_toml_frontmatter_collection(x, _) = to_toml_frontmatter(x) +function to_toml_frontmatter(a::Advisory) + # Convert all fields to TOML with a few special cases: + return OrderedDict{String,Any}( + string(f) => to_toml_frontmatter( + f == :affected ? filter(is_vulnerable, getproperty(a, f)) : # Skip (empty) non-vulnerabilities + getproperty(a, f)) + for f in fieldnames(Advisory) if + is_populated(getproperty(a, f)) && (f ∉ (:summary, :details))) # Summary and details are not frontmatter +end +to_toml_frontmatter(s::Severity) = to_toml_frontmatter_collection(s, [s]) +function to_toml_frontmatter_collection(s::Severity, xs) + if all(x isa Severity && x == tryparse(Severity, x.score) for x in xs) + return s.score + else + return OrderedDict(string(f) => to_toml_frontmatter(getproperty(s, f)) for f in fieldnames(Severity)) + end +end +to_toml_frontmatter(c::Credit) = to_toml_frontmatter_collection(c, [c]) +function _credit_shorthand(c::Credit) + contact = chopprefix(get(c.contact, 1, ""), "mailto:") + return string(c.name, isempty(contact) ? "" : " <$contact>") +end +function to_toml_frontmatter_collection(c::Credit, xs) + if all(x isa Credit && x == Credit(_credit_shorthand(x)) for x in xs) + return _credit_shorthand(c) + else + return OrderedDict(string(f) => to_toml_frontmatter(getproperty(c, f)) for f in fieldnames(Credit) if is_populated(getproperty(c, f))) + end +end +to_toml_frontmatter(r::Reference) = to_toml_frontmatter_collection(r, [r]) +function to_toml_frontmatter_collection(r::Reference, xs) + if all(x isa Reference && x.type == "WEB" for x in xs) + return r.url + else + return OrderedDict(string(f) => to_toml_frontmatter(getproperty(s, f)) for f in fieldnames(Reference)) + end +end +function to_toml_frontmatter(v::PackageVulnerability) + return OrderedDict("pkg" => to_toml_frontmatter(v.pkg), + "ranges" => to_toml_frontmatter(v.ranges)) +end + +function Base.print(io::IO, vuln::Advisory) + frontmatter = sprint(TOML.print, to_toml_frontmatter(vuln)) + nticks = maximum(x->length(x.captures[1])+1, eachmatch(r"(`+)", frontmatter), init=3) + println(io, repeat("`", nticks), "toml") + print(io, frontmatter) + println(io, repeat("`", nticks)) + println(io) + is_populated(vuln.summary) && println(io, "# ", vuln.summary, "\n") + is_populated(vuln.details) && println(io, vuln.details) + return nothing +end + +# Use the TOML/Markdown as the display: +Base.show(io::IO, mime::MIME"text/plain", vuln::Advisory) = show(io, mime, Markdown.parse(string(vuln))) +Base.show(io::IO, vuln::Advisory) = print(io, vuln) + +####### Read a Markdown/TOML advisory +parsefile(filename) = something(open(io->tryparse(Advisory, io), filename)) +function Base.tryparse(::Type{Advisory}, s::Union{AbstractString, IO}) + m = Markdown.parse(s).content + (length(m) >= 1 && m[1] isa Markdown.Code && m[1].language == "toml") || return nothing + frontmatter = TOML.tryparse(m[1].code) + frontmatter === nothing && return nothing + summary = if length(m) >= 2 && m[2] isa Markdown.Header + chopprefix(Markdown.plain(m[2]), r"^#+s+") + end + details = if length(m) >= 2+!isnothing(summary) + Markdown.plain(m[2+!isnothing(summary):end]) + end + + return try + Advisory(; Dict(Symbol(k)=>v for (k,v) in frontmatter)..., summary, details) + catch _ + nothing + end +end + +""" + to_osv_dict(x) + +Recursively convert an Advisory and all its fields (except `summary` and `details`) to serializable values for `JSON3.write` +such that it will create a valid OSV JSON +""" +to_osv_dict(v::Union{VersionNumber, VersionString, VersionRange, Dates.Time, Dates.Date}) = string(v) +to_osv_dict(x::Dates.DateTime) = chopsuffix(string(x), "Z") * "Z" # All times should be UTC; print them as such +to_osv_dict(x::Union{AbstractString, Integer, AbstractFloat, Bool}) = x +to_osv_dict(d::AbstractDict) = OrderedDict(string(k)=>to_osv_dict(v) for (k,v) in d) +to_osv_dict(A::AbstractArray) = [to_osv_dict(v) for v in A] +function to_osv_dict(a::Union{Advisory, Severity, Reference, Credit}) + return OrderedDict(string(f) => to_osv_dict(getproperty(a, f)) for f in fieldnames(typeof(a)) if is_populated(getproperty(a, f))) +end +# Package vulnerabilities are the one thing we store quite differently: +function to_osv_dict(vuln::PackageVulnerability) + return Dict{String, Any}( + "package" => OrderedDict( + "ecosystem" => "Julia", + "name" => vuln.pkg, + # TODO: "purl" => purl(vuln.pkg) + ), + "ranges" => [OrderedDict("type"=>"SEMVER", "events"=>osv_events(vuln.ranges))], + # TODO: "versions" => registered_versions_within_the_ranges(vuln.pkg, vuln.ranges) + # TODO: "database_specific" => Dict(vuln.source_type, vuln.source_mapping, etc...) ? + ) +end diff --git a/src/common.jl b/src/common.jl index 1d927135..914b20b4 100644 --- a/src/common.jl +++ b/src/common.jl @@ -3,10 +3,12 @@ using TOML: TOML using Pkg: Pkg, Registry using Tar: Tar using CodecZlib: GzipDecompressorStream -using DataStructures: OrderedDict as Dict +using DataStructures: OrderedDict, DefaultDict using Dates: Dates using TimeZones: TimeZones +const PREFIX="DONOTUSEJLSEC" + exists(advisory, key) = haskey(advisory, key) && is_populated(advisory[key]) exists(advisory, key, keys...) = exists(advisory, key) && exists(advisory, keys...) is_populated(::Nothing) = false @@ -122,20 +124,20 @@ function overlaps(a::VersionRange, b::VersionRange) end function merge_ranges(ranges) ranges = issorted(ranges) ? ranges : sort(ranges) - new_ranges = [] - rng = ranges[1] - for i in 2:length(ranges) - # Incrementally widen `rng` to "gobble up" any subsequent ranges it overlaps or _touches_, if at least one endpoint is inclusive) - if overlaps(rng, ranges[i]) || (rng.ub == ranges[i].lb && (rng.ubinclusive || ranges[i].lbinclusive)) - ubtuple = max(upper_bound_tuple(rng), upper_bound_tuple(ranges[i])) - rng = VersionRange(rng.lb, ubtuple[1], rng.lbinclusive, ubtuple[2]) + new_ranges = eltype(ranges)[] + base = nothing + for rng in ranges + base === nothing && (base = rng; continue) # first iteration + # Incrementally widen `base` to "gobble up" any subsequent ranges it overlaps or _touches_ (if at least one endpoint is inclusive) + if overlaps(base, rng) || (base.ub == rng.lb && (base.ubinclusive || rng.lbinclusive)) + ubtuple = max(upper_bound_tuple(base), upper_bound_tuple(rng)) + base = VersionRange(base.lb, ubtuple[1], base.lbinclusive, ubtuple[2]) else - push!(new_ranges, rng) - rng = ranges[i] + push!(new_ranges, base) + base = rng end - i += 1 end - push!(new_ranges, rng) + base !== nothing && push!(new_ranges, base) return new_ranges end @@ -157,6 +159,22 @@ function osv_events(rng::VersionRange) return events end +function extract_summary(description) + N = lastindex(description) + double_newline = something(findfirst("\n\n", description), N:N)[1] + if double_newline < 100 + return strip(chopprefix(description[1:double_newline], r"^#+")) + end + + flat_description = replace(description, r"\s+"=>" ") + summary_end = findfirst(". ", flat_description) + if summary_end !== nothing && summary_end[1] < 100 + flat_description[1:prevind(flat_description, summary_end[1])] + else + flat_description[1:min(thisind(flat_description, 100), end)] * "..." + end +end + function get_registry(reg=Registry.RegistrySpec(name="General", uuid = "23338594-aafe-5451-b93e-139f81909106"); depot=Pkg.depots1()) name = joinpath(depot, "registries", reg.name) if !ispath(name) && !ispath(name * ".toml") @@ -220,11 +238,11 @@ function upstream_projects_by_vendor_product() isassigned(UPSTREAM_PROJECTS_BY_VENDOR_PRODUCT) && return UPSTREAM_PROJECTS_BY_VENDOR_PRODUCT[] d = Dict{Tuple{String,String}, String}() for (project, deets) in upstream_projects() - products = unique(skipmissing(vcat(lowercase(project), get(split(get(deets, "cpe", ""), ":"), 5, missing)))) + products = unique(skipmissing(vcat(project, get(split(get(deets, "cpe", ""), ":"), 5, missing)))) vendors = unique(skipmissing(vcat(get(deets, "vendors", String[]), get(split(get(deets, "cpe", ""), ":"), 4, missing)))) for v in vendors, p in products - haskey(d, (v,p)) && error("every vendor/product pair must uniquely identify one upstream projects") - d[(v,p)] = project + haskey(d, (lowercase(v),lowercase(p))) && d[(lowercase(v),lowercase(p))] != project && error("every vendor/product pair must uniquely identify (case-insensitive) one upstream projects") + d[(lowercase(v),lowercase(p))] = project end end UPSTREAM_PROJECTS_BY_VENDOR_PRODUCT[] = d @@ -310,62 +328,49 @@ function convert_versions(pkg_project_map, vulnerable_range) versions end -function related_julia_packages(description, vendorproductversions) - pkgs = Pair{String,String}[] - # There are four reasons why this might return a "*" range +function affected_julia_packages(description, vendorproductversions) + pkgs = DefaultDict{String, Any}(()->DefaultDict{String, Any}(()->OrderedDict{String, Any}())) + # There are three reasons why this might return a ["*"] range # 1. That's the correct answer - # 2. It's pessimistically returned because we failed to parse an upstream version - # 3. It's pessimistically returned because we failed to parse a Julia version - # 4. It's pessimistically returned because we failed to match a mentioned Julia package to a product - upstream_version_failures = Pair{String, Pair{String, String}}[] - whys = Dict{String, Vector{String}}() + # 2. It's pessimistically returned because we failed to parse the versions reported in the advisory + # 3. It's pessimistically returned because we failed to match a mentioned Julia package to a product julia_like_pkgs_mentioned = union((m.captures[1] for m in eachmatch(r"\b(\w+)\.jl\b", description)), (m.captures[1]*"_jll" for m in eachmatch(r"\b(\w+)_jll\b", description))) jlpkgs_mentioned = filter(registry_has_package, julia_like_pkgs_mentioned) found_match = false - for (vendor, product, version) in vendorproductversions + advisory_type = nothing + for (vendor, product, version) in unique(vendorproductversions) # First check for a known **NON-JULIA-PACKAGE** CPE: - if haskey(upstream_projects_by_vendor_product(), (vendor, product)) - matched_project = upstream_projects_by_vendor_product()[(vendor, product)] + if haskey(upstream_projects_by_vendor_product(), (lowercase(vendor), lowercase(product))) + matched_project = upstream_projects_by_vendor_product()[(lowercase(vendor), lowercase(product))] found_match = true # We have an upstream component! Compute the remapped version range if we can. matched_pkgs = packages_with_project(matched_project) r = tryparse(VersionRange, version) for pkg in matched_pkgs - if isnothing(r) - push!(pkgs, pkg => "*") - push!(get!(Vector{String}, whys, pkg), "failed to parse upstream $matched_project range `$(repr(version))`; this became `\"*\"`") - else - pkg_vers = string.(convert_versions(package_project_version_map(pkg, matched_project), r)) - append!(pkgs, pkg .=> pkg_vers) - push!(get!(Vector{String}, whys, pkg), "upstream $matched_project range `$(repr(version))` became `$(repr(pkg_vers))`") - end + pkgs[pkg]["$vendor:$product"][version] = isnothing(r) ? + [VersionRange{VersionNumber}("*")] : convert_versions(package_project_version_map(pkg, matched_project), r) end + isnothing(advisory_type) || @assert(advisory_type == "upstream", "advisory directly lists $pkg, but it also finds upstream components") + advisory_type = "upstream" else - r = tryparse(VersionRange{VersionNumber}, version) if (contains(lowercase(vendor), "julia") || endswith(product, ".jl")) && registry_has_package(chopsuffix(product, ".jl")) # A vendor or package _looks_ really julia-ish and is in the registry found_match = true + r = tryparse(VersionRange{VersionNumber}, version) pkg = chopsuffix(product, ".jl") - if isnothing(r) - push!(pkgs, pkg => "*") - push!(get!(Vector{String}, whys, pkg), "failed to parse version range `$(repr(version))`; this became `\"*\"`") - else - push!(pkgs, pkg => version) - push!(get!(Vector{String}, whys, pkg), "used version range `$(repr(version))` directly") - end + pkgs[pkg]["$vendor:$product"][version] = [something(r, VersionRange{VersionNumber}("*"))] + isnothing(advisory_type) || @assert(advisory_type == "alias", "advisory directly lists $pkg, but it also finds upstream components") + advisory_type = "alias" elseif !isempty(jlpkgs_mentioned) # There are packages mentioned in the description! First look for a possible match against the given product for pkg in jlpkgs_mentioned pkg == chopsuffix(product, ".jl") || continue found_match = true - if isnothing(r) - push!(pkgs, pkg => "*") - push!(get!(Vector{String}, whys, pkg), "failed to parse version range `$(repr(version))`; this became `\"*\"`") - else - push!(pkgs, pkg => version) - push!(get!(Vector{String}, whys, pkg), "used version range `$(repr(version))` directly") - end + r = tryparse(VersionRange{VersionNumber}, version) + pkgs[pkg]["$vendor:$product"][version] = [something(r, VersionRange{VersionNumber}("*"))] + isnothing(advisory_type) || @assert(advisory_type == "alias", "advisory directly lists $pkg, but it also finds upstream components") + advisory_type = "alias" break end end @@ -373,18 +378,35 @@ function related_julia_packages(description, vendorproductversions) end if !found_match && !isempty(jlpkgs_mentioned) # We didn't connect any vendor/product pair with a Julia package, but there are some mentioned. - # if there is only one vendor/product pair here, we blindly connect all to the mentioned verions - # Otherwise, we prefix the packages with a ? to make this abundantly obvious - vendor_products = unique((x->x[1:2]).(vendorproductversions)) - push!(pkgs, jlpkgs_mentioned .=> "*") + # TODO: this could potentially do better by trying to correlate the listed versions against + # the registered ones, but this is quite the rare case and not worth worrying too much about + @warn "failed to match the mentioned packages to a product with a version" + @warn "assuming that all mentioned products are vulnerable at all versions" for pkg in jlpkgs_mentioned - push!(get!(Vector{String}, whys, pkg), "failed to connect any of the mentioned packages $jlpkgs_mentioned to a product: $vendor_products") + for (vendor, product, version) in unique(vendorproductversions) + pkgs[pkg]["$vendor:$product"][version] = [VersionRange{VersionNumber}("*")] + end end + advisory_type = "alias" end - # Combine all versions for a given package into an array, merging ranges - unique_pkg_names = unique(first.(pkgs)) - return [(p, string.(merge_ranges(VersionRange{VersionNumber}.(last.(pkgs[first.(pkgs) .== p])))), unique(whys[p])) for p in unique_pkg_names] + # return pkgs + vulns = PackageVulnerability[] + for (pkg, source_mapping) in pkgs + # Use a better sorting if we can: + for (_, vs) in source_mapping + if all(!isnothing, tryparse.(VersionRange, keys(vs))) + sort!(vs, by=x->something(tryparse(AdvisoryDB.VersionRange, x), x)) + else + sort!(vs) + end + end + push!(vulns, PackageVulnerability(pkg, + merge_ranges(sort(collect(Iterators.flatten(v for (proj,map) in source_mapping for (_,v) in map)))), + advisory_type, + source_mapping)) + end + return vulns end # TODO: use the above Pkg machinery for this, too @@ -407,144 +429,47 @@ function all_jlls() filter(endswith("_jll")∘first, pkgs) end -function get_packages(osv) - pkgs = Tuple{String,String}[] - id = osv.id - println("Looking for Julia packages in $id") - if haskey(osv, :affected) && !isempty(osv.affected) - for vuln in osv.affected - if haskey(vuln, :package) && haskey(vuln.package, :name) && haskey(vuln.package, :ecosystem) - lowercase(string(vuln.package.ecosystem)) == "julia" || continue - pkgname = chopsuffix(strip(vuln.package.name), ".jl") - println(" - looking for $pkgname in the General registry") - uuids = get_uuids_in_general(pkgname) - if length(uuids) != 1 - println(" ⨯ found $(length(uuids)) UUIDs for $pkgname") - create_issue(ENV["GITHUB_REPOSITORY"], - title="Failed to find a registered $(pkgname) for $is", - body=""" - The advisory $id names **$pkgname** as an affected product from the - Julia ecosystem, but $(isempty(uuids) ? "no" : "more than one") match was found - in the General registry. - - The complete OSV advisory is: - - ```json - $(sprint((io,x)->JSON3.pretty(io, x, JSON3.AlignmentContext(indent=2)), osv)) - ``` - """ - ) - else - println(" - found $pkgname => $(only(uuids))") - push!(pkgs, (pkgname, only(uuids))) - end - end - end - end - return pkgs -end - -function import_osv_files(path) - packages_dir = "packages/General" - n = 0 - for filename in readdir(path) - endswith(filename, ".json") || continue - - osv_data = JSON3.read(joinpath(path, filename)) - - for (package, uuid) in get_packages(osv_data) - package_dir = joinpath(packages_dir, package) - mkpath(package_dir) - - filename = "$(osv_data.id).json" - filepath = joinpath(package_dir, filename) - - println("Writing advisory: $filepath") - open(filepath, "w") do f - JSON3.pretty(f, osv_data, JSON3.AlignmentContext(indent=2)) - end - n += 1 - end - end - - println("Completed writing $n advisories to disk") -end - """ - corresponding_jlsec_id(package, id, aliases=String[]) + corresponding_jlsec_path(id, aliases=String[]) -Given a Julia package and an upstream advisory id and an (optional) list of aliases, -return the corresponding JLSEC advisory id if it exists and `nothing` otherwise. +Given an upstream advisory id and an (optional) list of its own aliases, +return the path to the corresponding JLSEC advisory if it exists and `nothing` otherwise. """ -function corresponding_jlsec_id(package, id, aliases=String[]) - # The obvious cases are those where the upstream advisory has a JLSEC alias - startswith(id, "DONOTUSEJLSEC-") && return id - alias_idx = findfirst(startswith("DONOTUSEJLSEC-"), aliases) - !isnothing(alias_idx) && return aliases[alias_idx] - - # Or the JLSEC might have been created from the upstream advisory (or one of its aliases) - # So search all published package JLSECs for their alias information - path = joinpath(@__DIR__, "..", "packages", "General", package) +function corresponding_jlsec_path(id, aliases=String[]) isdir(path) || return nothing - jlsec_aliases = Dict{String, String}() - for f in readdir(path) - jlsec, ext = splitext(f) - ext == ".json" || (@warn "unexpected extension $ext in $path/$f"; continue) - for alias in get(JSON3.read(joinpath(path, f)), :aliases, String[]) - jlsec_aliases[alias] = jlsec + ids = Set(Iterators.flatten((id, aliases))) + for (root, _, files) in walkdir(path) + for file in joinpath.(root, files) + is_jlsec_advisory_path_path(file) || continue + candidate = parsefile(file) + for alias in Iterators.flatten((candidate.id, candidate.aliases, candidate.upstream)) + alias in ids && return file + end end end - # And then search for the first match - for alias in vcat(id, chopprefix(id, r".*(?=GHSA-\w{4}-\w{4}-\w{4}$)"), sort(aliases)) - haskey(jlsec_aliases, alias) && return jlsec_aliases[alias] - end return nothing end -function create!(pkg, osv) - pkg_path = joinpath(@__DIR__, "..", "packages", "General", pkg) - mkpath(pkg_path) - open(joinpath(pkg_path, string("DONOTUSEJLSEC-0000-", string(rand(UInt64), base=36, pad=13), ".json")), "w") do f - JSON3.pretty(f, osv, JSON3.AlignmentContext(indent=2)) - end -end +""" + is_jlsec_advisory_path_path(path) -stringify_keys(x) = x -stringify_keys(x::AbstractArray) = stringify_keys.(x) -stringify_keys(x::AbstractDict) = Dict(string(k)=>stringify_keys(v) for (k,v) in x) - -function update!(jlsec_path::AbstractString, osv) - # JSON3 gives us Symbol keys, but the osv has strings (TODO, this is messy) - original_jlsec = JSON3.read(jlsec_path) - jlsec = stringify_keys(original_jlsec) - updated = false - for key in union(keys(jlsec), keys(osv)) - key in ("id", "modified", "published") && continue - if haskey(osv, key) && (get(jlsec, key, "sentinel: wNVPEsdcSJ4N") != osv[key]) - @info "updating $(basename(jlsec_path)) because $key differs" - jlsec[key] = osv[key] - updated = true - end - end - if updated - jlsec["modified"] = now() - open(jlsec_path, "w") do f - JSON3.pretty(f, jlsec, JSON3.AlignmentContext(indent=2)) - end - end - return updated +Given a path, do a simple check to see if it looks like a JLSEC advisory +""" +function is_jlsec_advisory_path(path) + file, ext = splitext(basename(path)) + return startswith(file, PREFIX) && ext == ".md" end -function fetch_advisory(advisory_id, package_verisioninfo=nothing) +function fetch_advisory(advisory_id) if startswith(advisory_id, "CVE-") vuln = NVD.fetch_cve(advisory_id) - return NVD.convert_to_osv(vuln, package_verisioninfo) + return NVD.advisory(vuln) elseif startswith(advisory_id, "EUVD-") vuln = EUVD.fetch_enisa(advisory_id) - return EUVD.convert_to_osv(vuln, package_verisioninfo) + return EUVD.advisory(vuln) elseif endswith(advisory_id, r"GHSA-\w{4}-\w{4}-\w{4}") vuln = GitHub.fetch_ghsa(advisory_id) - return GitHub.convert_to_osv(vuln, package_verisioninfo) + return GitHub.advisory(vuln) else throw(ArgumentError("unknown advisory: $advisory_id")) end @@ -556,15 +481,5 @@ function fetch_product_matches(vendor, product) @info "got $(length(nvds)) advisories from NVD" euvds = EUVD.fetch_product_matches(vendor, product) @info "got $(length(euvds)) advisories from EUVD" - missing_ids = setdiff(filter(startswith("CVE"), EUVD.vuln_id.(euvds)), (x->x.cve.id).(nvds)) - @info "adding another $(min(length(missing_ids),200)) advisories from NVD" - for missing_id in missing_ids[1:min(end, 200)] # 20 minutes - sleep(6) - try - push!(nvds, NVD.fetch_cve(missing_id)) - catch ex - @info ex - end - end return nvds, euvds end diff --git a/test/runtests.jl b/test/runtests.jl index a4b005f6..27840451 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -37,13 +37,13 @@ end desc = "An out-of-bounds read flaw was found in the CLARRV, DLARRV, SLARRV, and ZLARRV functions in lapack through version 3.10.0, as also used in OpenBLAS before version 0.3.18. Specially crafted inputs passed to these functions could cause an application using lapack to crash or possibly disclose portions of its memory." vpv = [("lapack_project", "lapack", "<= 3.10.0"), ("openblas_project", "openblas", "< 0.3.18"), ("julialang", "julia", "<= 1.6.3"), ("julialang", "julia", "= 1.7.0-beta1"), ("julialang", "julia", "= 1.7.0-beta2"), ("julialang", "julia", "= 1.7.0-beta3"), ("julialang", "julia", "= 1.7.0-beta4"), ("julialang", "julia", "= 1.7.0-rc1"), ("redhat", "ceph_storage", "= 2.0"), ("redhat", "ceph_storage", "= 3.0"), ("redhat", "ceph_storage", "= 4.0"), ("redhat", "ceph_storage", "= 5.0"), ("redhat", "openshift_container_storage", "= 4.0"), ("redhat", "openshift_data_foundation", "= 4.0"), ("redhat", "enterprise_linux", "= 8.0"), ("fedoraproject", "fedora", "= 34"), ("fedoraproject", "fedora", "= 35")] - matches = AdvisoryDB.related_julia_packages(desc, vpv) - @test "julia" ∉ first.(matches) - @test "OpenBLAS_jll" in first.(matches) - @test "OpenBLAS32_jll" in first.(matches) - @test "OpenBLASHighCoreCount_jll" in first.(matches) + matches = AdvisoryDB.affected_julia_packages(desc, vpv) + @test "julia" ∉ (x->x.pkg).(matches) + @test "OpenBLAS_jll" in (x->x.pkg).(matches) + @test "OpenBLAS32_jll" in (x->x.pkg).(matches) + @test "OpenBLASHighCoreCount_jll" in (x->x.pkg).(matches) - @test !any(contains("∞"), (x->x[2])(only(matches[first.(matches) .== "OpenBLASHighCoreCount_jll"]))) + @test only(matches[(x->x.pkg).(matches) .== "OpenBLASHighCoreCount_jll"]).ranges == [AdvisoryDB.VersionRange{VersionNumber}("*")] end using AdvisoryDB: convert_versions, VersionRange