Skip to content

Commit

Permalink
url/url: Add URL parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Zer0-One committed Apr 3, 2023
1 parent 525d7e1 commit 0c83d93
Show file tree
Hide file tree
Showing 32 changed files with 2,367 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ coverage --combined_report=lcov
test --test_output=errors
test --test_summary=terse
test --test_verbose_timeout_warnings
# Set ICU data directory for tests
test --test_env=HASTUR_ICU_DATA=external/icu-data/

# Bazel deprecations
# =========================================================
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ jobs:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- run: grep --recursive --no-filename --only-matching --exclude=WORKSPACE --exclude=*test.cpp --exclude=ci.yaml 'https://[^)(}{",# ]*' | grep -v '^https://$' | sort | uniq | xargs wget --spider
- run: grep --recursive --no-filename --only-matching --exclude-dir="*corpus" --exclude=WORKSPACE --exclude=*test.cpp --exclude=ci.yaml 'https://[^)(}{",# ]*' | grep -v '^https://$' | sort | uniq | xargs wget --spider

gitlint:
runs-on: ubuntu-22.04
Expand Down
7 changes: 7 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ http_archive(
url = "https://github.com/unicode-org/icu/releases/download/release-72-1/icu4c-72_1-src.tgz",
)

http_archive(
name = "icu-data", # Unicode-DFS-2016
build_file_content = "exports_files([\"icudt72l.dat\"])",
sha256 = "1bc02487cbeaec3fc2d0dc941e8b243e7d35cd79899a201df88dc9ec9667a162",
url = "https://github.com/unicode-org/icu/releases/download/release-72-1/icu4c-72_1-data-bin-l.zip",
)

http_archive(
name = "imgui", # MIT
build_file = "//third_party:imgui.BUILD",
Expand Down
15 changes: 11 additions & 4 deletions third_party/icu.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,20 @@ cc_library(
],
"//conditions:default": [],
}),
defines = [
"U_STATIC_IMPLEMENTATION",
"U_COMMON_IMPLEMENTATION",
"U_CHARSET_IS_UTF8=1",
"U_HIDE_OBSOLETE_UTF_OLD_H=1",
"UCONFIG_NO_CONVERSION=1",
],
linkopts = select({
"@platforms//os:windows": [],
"@platforms//os:windows": [
"-DEFAULTLIB:advapi32",
],
"//conditions:default": ["-ldl"],
}),
local_defines = [
"U_COMMON_IMPLEMENTATION",
],
linkstatic = True,
strip_include_prefix = "source/common/",
visibility = ["//visibility:public"],
)
30 changes: 29 additions & 1 deletion url/BUILD
Original file line number Diff line number Diff line change
@@ -1,16 +1,33 @@
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load("//bzl:copts.bzl", "HASTUR_COPTS")
load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test")
load("//bzl:copts.bzl", "HASTUR_COPTS", "HASTUR_FUZZ_PLATFORMS")

cc_library(
name = "rtti_hack",
srcs = ["rtti_hack.cpp"],
hdrs = ["rtti_hack.h"],
copts = HASTUR_COPTS + select({
"@platforms//os:windows": ["/GR"],
"//conditions:default": ["-frtti"],
}),
deps = ["@icu//:common"],
)

cc_library(
name = "url",
srcs = ["url.cpp"],
hdrs = ["url.h"],
copts = HASTUR_COPTS,
data = ["@icu-data//:icudt72l.dat"],
visibility = ["//visibility:public"],
deps = [
":rtti_hack",
"//util:base_parser",
"//util:string",
"//util:unicode",
"//util:uuid",
"@icu//:common",
"@spdlog",
],
)

Expand All @@ -22,5 +39,16 @@ cc_test(
deps = [
":url",
"//etest",
"@icu//:common",
],
)

cc_fuzz_test(
name = "url_fuzz_test",
size = "small",
srcs = ["url_fuzz_test.cpp"],
copts = HASTUR_COPTS,
corpus = glob(["url_fuzz_test_corpus/**"]),
target_compatible_with = HASTUR_FUZZ_PLATFORMS,
deps = [":url"],
)
7 changes: 7 additions & 0 deletions url/rtti_hack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause

#include "url/rtti_hack.h"

template class icu::StringByteSink<std::string>;
17 changes: 17 additions & 0 deletions url/rtti_hack.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause

#ifndef URL_RTTI_HACK_H_
#define URL_RTTI_HACK_H_

#include <unicode/bytestream.h>

#include <string>

// icu needs to be compiled w/ rtti, and that means that any templates of theirs
// that we instantiate also require rtti, so we instantiate them here to try to
// shield the rest of the codebase from that.
extern template class icu::StringByteSink<std::string>;

#endif

0 comments on commit 0c83d93

Please sign in to comment.