Skip to content

Commit

Permalink
feat: add std regex builtins
Browse files Browse the repository at this point in the history
Upstream issue: google/jsonnet#1039
  • Loading branch information
CertainLach committed Jun 15, 2023
1 parent 777cdf5 commit 33ce087
Show file tree
Hide file tree
Showing 5 changed files with 232 additions and 0 deletions.
44 changes: 44 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions crates/jrsonnet-evaluator/src/typed/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,22 @@ impl Typed for String {
}
}

impl Typed for StrValue {
const TYPE: &'static ComplexValType = &ComplexValType::Simple(ValType::Str);

fn into_untyped(value: Self) -> Result<Val> {
Ok(Val::Str(value))
}

fn from_untyped(value: Val) -> Result<Self> {
<Self as Typed>::TYPE.check(&value)?;
match value {
Val::Str(s) => Ok(s),
_ => unreachable!(),
}
}
}

impl Typed for char {
const TYPE: &'static ComplexValType = &ComplexValType::Char;

Expand Down
3 changes: 3 additions & 0 deletions crates/jrsonnet-stdlib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ serde_json = "1.0"
serde_yaml_with_quirks = "0.8.24"

num-bigint = { version = "0.4.3", optional = true }
regex = "1.8.4"
lru = "0.10.0"
rustc-hash = "1.1.0"

[build-dependencies]
jrsonnet-parser.workspace = true
Expand Down
35 changes: 35 additions & 0 deletions crates/jrsonnet-stdlib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ mod sets;
pub use sets::*;
mod compat;
pub use compat::*;
mod regex;
pub use crate::regex::*;

pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
let mut builder = ObjValueBuilder::new();
Expand Down Expand Up @@ -154,6 +156,8 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
// Sets
("setMember", builtin_set_member::INST),
("setInter", builtin_set_inter::INST),
// Regex
("regexQuoteMeta", builtin_regex_quote_meta::INST),
// Compat
("__compare", builtin___compare::INST),
]
Expand Down Expand Up @@ -187,6 +191,37 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
.value(Val::Func(FuncVal::builtin(builtin_trace { settings })))
.expect("no conflict");

// Regex
let regex_cache = RegexCache::default();
builder
.member("regexFullMatch".into())
.hide()
.value(Val::Func(FuncVal::builtin(builtin_regex_full_match {
cache: regex_cache.clone(),
})))
.expect("no conflict");
builder
.member("regexPartialMatch".into())
.hide()
.value(Val::Func(FuncVal::builtin(builtin_regex_partial_match {
cache: regex_cache.clone(),
})))
.expect("no conflict");
builder
.member("regexReplace".into())
.hide()
.value(Val::Func(FuncVal::builtin(builtin_regex_replace {
cache: regex_cache.clone(),
})))
.expect("no conflict");
builder
.member("regexGlobalReplace".into())
.hide()
.value(Val::Func(FuncVal::builtin(builtin_regex_global_replace {
cache: regex_cache.clone(),

Check warning on line 221 in crates/jrsonnet-stdlib/src/lib.rs

View workflow job for this annotation

GitHub Actions / clippy

redundant clone

warning: redundant clone --> crates/jrsonnet-stdlib/src/lib.rs:221:22 | 221 | cache: regex_cache.clone(), | ^^^^^^^^ help: remove this | note: this value is dropped without further use --> crates/jrsonnet-stdlib/src/lib.rs:221:11 | 221 | cache: regex_cache.clone(), | ^^^^^^^^^^^ = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#redundant_clone = note: `#[warn(clippy::redundant_clone)]` on by default
})))
.expect("no conflict");

builder
.member("id".into())
.hide()
Expand Down
134 changes: 134 additions & 0 deletions crates/jrsonnet-stdlib/src/regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
use std::{cell::RefCell, hash::BuildHasherDefault, num::NonZeroUsize, rc::Rc};

use ::regex::Regex;
use jrsonnet_evaluator::{
error::{ErrorKind::*, Result},
val::StrValue,
IStr, ObjValueBuilder, Val,
};
use jrsonnet_macros::builtin;
use lru::LruCache;
use rustc_hash::FxHasher;

pub struct RegexCacheInner {
cache: RefCell<LruCache<IStr, Rc<Regex>, BuildHasherDefault<FxHasher>>>,
}
impl Default for RegexCacheInner {
fn default() -> Self {
Self {
cache: RefCell::new(LruCache::with_hasher(
NonZeroUsize::new(20).unwrap(),
BuildHasherDefault::default(),
)),
}
}
}
pub type RegexCache = Rc<RegexCacheInner>;
impl RegexCacheInner {
fn parse(&self, pattern: IStr) -> Result<Rc<Regex>> {
let mut cache = self.cache.borrow_mut();
if let Some(found) = cache.get(&pattern) {
return Ok(found.clone());
}
let regex = Regex::new(&pattern)
.map_err(|e| RuntimeError(format!("regex parse failed: {e}").into()))?;
let regex = Rc::new(regex);
cache.push(pattern, regex.clone());
Ok(regex)
}
}

pub fn regex_match_inner(regex: &Regex, str: String) -> Result<Val> {
let mut out = ObjValueBuilder::with_capacity(3);

let mut captures = Vec::with_capacity(regex.captures_len());
let mut named_captures = ObjValueBuilder::with_capacity(regex.capture_names().len());

let Some(captured) = regex.captures(&str) else {
return Ok(Val::Null)
};

for ele in captured.iter().skip(1) {
if let Some(ele) = ele {
captures.push(Val::Str(StrValue::Flat(ele.as_str().into())))
} else {
captures.push(Val::Str(StrValue::Flat(IStr::empty())))
}
}
for (i, name) in regex
.capture_names()
.skip(1)
.enumerate()
.flat_map(|(i, v)| Some((i, v?)))
{
let capture = captures[i].clone();
named_captures.member(name.into()).value(capture)?;
}

out.member("string".into())
.value_unchecked(Val::Str(captured.get(0).unwrap().as_str().into()));
out.member("captures".into())
.value_unchecked(Val::Arr(captures.into()));
out.member("namedCaptures".into())
.value_unchecked(Val::Obj(named_captures.build()));

Ok(Val::Obj(out.build()))
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_partial_match(
this: &builtin_regex_partial_match,
pattern: IStr,
str: String,
) -> Result<Val> {
let regex = this.cache.parse(pattern)?;
regex_match_inner(&regex, str)
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_full_match(
this: &builtin_regex_full_match,
pattern: StrValue,
str: String,
) -> Result<Val> {
let pattern = format!("^{pattern}$").into();
let regex = this.cache.parse(pattern)?;
regex_match_inner(&regex, str)
}

#[builtin]
pub fn builtin_regex_quote_meta(pattern: String) -> String {
regex::escape(&pattern)
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_replace(
this: &builtin_regex_replace,
str: String,
pattern: IStr,
to: String,
) -> Result<String> {
let regex = this.cache.parse(pattern)?;
let replaced = regex.replace(&str, to);
Ok(replaced.to_string())
}

#[builtin(fields(
cache: RegexCache,
))]
pub fn builtin_regex_global_replace(
this: &builtin_regex_global_replace,
str: String,
pattern: IStr,
to: String,
) -> Result<String> {
let regex = this.cache.parse(pattern)?;
let replaced = regex.replace_all(&str, to);
Ok(replaced.to_string())
}

0 comments on commit 33ce087

Please sign in to comment.