Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

url/url: Add URL parser #476

Merged
merged 1 commit into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ coverage --combined_report=lcov
test --test_output=errors
test --test_summary=terse
test --test_verbose_timeout_warnings
# Set ICU data directory for tests
test --test_env=HASTUR_ICU_DATA=external/icu-data/
Zer0-One marked this conversation as resolved.
Show resolved Hide resolved

# Bazel deprecations
# =========================================================
Expand Down
3 changes: 3 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#
# -bugprone-narrowing-conversions: Very noisy for not much gain.
#
# -bugprone-unchecked-optional-access: Makes clang-tidy hang during CI.
#
# -clang-analyzer-cplusplus.NewDeleteLeaks: Lots of false positives w/
# -std=c++2b when calling std::make_shared in the JS AST.
# js/ast_executor_test.cpp:176:5: error: Potential leak of memory pointed to by
Expand Down Expand Up @@ -54,6 +56,7 @@ Checks: >
readability-qualified-auto,
-bugprone-exception-escape,
-bugprone-narrowing-conversions,
-bugprone-unchecked-optional-access,
-clang-analyzer-cplusplus.NewDeleteLeaks,
-clang-analyzer-optin.cplusplus.UninitializedObject,
-clang-diagnostic-builtin-macro-redefined,
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ jobs:
timeout-minutes: 30
steps:
- uses: actions/checkout@v3
- run: grep --recursive --no-filename --only-matching --exclude=WORKSPACE --exclude=*test.cpp --exclude=ci.yaml 'https://[^)(}{",# ]*' | grep -v '^https://$' | sort | uniq | xargs wget --spider
- run: grep --recursive --no-filename --only-matching --exclude-dir="*corpus" --exclude=WORKSPACE --exclude=*test.cpp --exclude=ci.yaml 'https://[^)(}{",# ]*' | grep -v '^https://$' | sort | uniq | xargs wget --spider

gitlint:
runs-on: ubuntu-22.04
Expand Down
7 changes: 7 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,13 @@ http_archive(
)

# https://github.com/ocornut/imgui
http_archive(
name = "icu-data", # Unicode-DFS-2016
build_file_content = "exports_files([\"icudt72l.dat\"])",
sha256 = "1bc02487cbeaec3fc2d0dc941e8b243e7d35cd79899a201df88dc9ec9667a162",
url = "https://github.com/unicode-org/icu/releases/download/release-72-1/icu4c-72_1-data-bin-l.zip",
)

http_archive(
name = "imgui", # MIT
build_file = "//third_party:imgui.BUILD",
Expand Down
15 changes: 11 additions & 4 deletions third_party/icu.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,20 @@ cc_library(
],
"//conditions:default": [],
}),
defines = [
"U_STATIC_IMPLEMENTATION",
"U_COMMON_IMPLEMENTATION",
"U_CHARSET_IS_UTF8=1",
"U_HIDE_OBSOLETE_UTF_OLD_H=1",
"UCONFIG_NO_CONVERSION=1",
],
linkopts = select({
"@platforms//os:windows": [],
"@platforms//os:windows": [
"-DEFAULTLIB:advapi32",
],
"//conditions:default": ["-ldl"],
}),
local_defines = [
"U_COMMON_IMPLEMENTATION",
],
linkstatic = True,
strip_include_prefix = "source/common/",
visibility = ["//visibility:public"],
)
30 changes: 29 additions & 1 deletion url/BUILD
Original file line number Diff line number Diff line change
@@ -1,16 +1,33 @@
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load("//bzl:copts.bzl", "HASTUR_COPTS")
load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test")
load("//bzl:copts.bzl", "HASTUR_COPTS", "HASTUR_FUZZ_PLATFORMS")

cc_library(
name = "rtti_hack",
srcs = ["rtti_hack.cpp"],
hdrs = ["rtti_hack.h"],
copts = HASTUR_COPTS + select({
"@platforms//os:windows": ["/GR"],
"//conditions:default": ["-frtti"],
}),
deps = ["@icu//:common"],
)

cc_library(
name = "url",
srcs = ["url.cpp"],
hdrs = ["url.h"],
copts = HASTUR_COPTS,
data = ["@icu-data//:icudt72l.dat"],
visibility = ["//visibility:public"],
deps = [
":rtti_hack",
"//util:base_parser",
"//util:string",
"//util:unicode",
"//util:uuid",
"@icu//:common",
"@spdlog",
],
)

Expand All @@ -22,5 +39,16 @@ cc_test(
deps = [
":url",
"//etest",
"@icu//:common",
],
)

cc_fuzz_test(
name = "url_fuzz_test",
size = "small",
srcs = ["url_fuzz_test.cpp"],
copts = HASTUR_COPTS,
corpus = glob(["url_fuzz_test_corpus/**"]),
target_compatible_with = HASTUR_FUZZ_PLATFORMS,
deps = [":url"],
)
7 changes: 7 additions & 0 deletions url/rtti_hack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause

#include "url/rtti_hack.h"

template class icu::StringByteSink<std::string>;
17 changes: 17 additions & 0 deletions url/rtti_hack.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
//
// SPDX-License-Identifier: BSD-2-Clause

#ifndef URL_RTTI_HACK_H_
#define URL_RTTI_HACK_H_

#include <unicode/bytestream.h>

#include <string>

// icu needs to be compiled w/ rtti, and that means that any templates of theirs
// that we instantiate also require rtti, so we instantiate them here to try to
// shield the rest of the codebase from that.
extern template class icu::StringByteSink<std::string>;

#endif