Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
495751c
Add generic trait for method parameters
alexvbrdn Jul 1, 2025
8abe573
WIP
alexvbrdn Jul 6, 2025
1e7ec95
WIP
alexvbrdn Jul 8, 2025
9cf30a6
WIP
alexvbrdn Jul 10, 2025
671f3a3
WIP
alexvbrdn Jul 10, 2025
75c06b3
add parallel intersection
alexvbrdn Jul 27, 2025
b37ef65
WIP
alexvbrdn Jul 28, 2025
3ea0dec
update readme
alexvbrdn Jul 29, 2025
a47c779
rename methods
alexvbrdn Aug 2, 2025
bcc9d7d
Update description
alexvbrdn Aug 2, 2025
8d5b66e
Update docs
alexvbrdn Aug 2, 2025
90c462b
update
alexvbrdn Aug 3, 2025
7884f73
update most descriptions
alexvbrdn Aug 3, 2025
eb79826
fix bench
alexvbrdn Aug 3, 2025
691a972
fix docs test
alexvbrdn Aug 3, 2025
4fe1d94
update docs
alexvbrdn Aug 3, 2025
a42c87a
Update README.md
alexvbrdn Aug 4, 2025
aec5c39
Update naming and docs
alexvbrdn Aug 4, 2025
19aef3f
improve test
alexvbrdn Aug 4, 2025
6878356
Fix bad repetition case
alexvbrdn Aug 6, 2025
29697f8
fix algo repeat
alexvbrdn Aug 8, 2025
e24624e
update serialization
alexvbrdn Aug 9, 2025
f874caa
remove some errors
alexvbrdn Aug 11, 2025
c2cc842
Change regex convertion algo
alexvbrdn Sep 16, 2025
135cca6
update tests
alexvbrdn Sep 16, 2025
c3d800a
fix clippy
alexvbrdn Sep 16, 2025
afbacc6
add test
alexvbrdn Sep 16, 2025
7b576f7
update readme
alexvbrdn Sep 16, 2025
0a0d91b
update readme
alexvbrdn Sep 16, 2025
05b6802
update method signature
alexvbrdn Sep 16, 2025
a2dc371
add concat all for regex
alexvbrdn Sep 17, 2025
7afac62
update docs
alexvbrdn Sep 19, 2025
2119ea6
update doc
alexvbrdn Sep 19, 2025
0315e7d
additional updates
alexvbrdn Sep 19, 2025
1fd6bfc
update readme
alexvbrdn Sep 19, 2025
499735d
update docs
alexvbrdn Sep 20, 2025
b67597d
update method signatures
alexvbrdn Sep 21, 2025
c243522
fix failed build
alexvbrdn Sep 21, 2025
02852f9
fix serialization
alexvbrdn Sep 24, 2025
9a1266f
Huge improvements in generate strings
alexvbrdn Oct 2, 2025
fb5eb1a
Fix bad implementation of to_embedding
alexvbrdn Oct 3, 2025
1e4980b
improve assert_not_timed_out clock cycle
alexvbrdn Oct 3, 2025
68e87c4
Parallelize state selection for elimination
alexvbrdn Oct 8, 2025
863fdce
Fix misuse of hashmap for determinize
alexvbrdn Oct 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,7 @@ Cargo.lock
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
#.idea/

# cargo mutants output
mutants.out*/
18 changes: 8 additions & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
[package]
name = "regexsolver"
version = "0.3.1"
edition = "2021"
version = "1.0.0"
edition = "2024"
authors = ["Alexandre van Beurden"]
repository = "https://github.com/RegexSolver/regexsolver"
license = "MIT"
keywords = ["automaton", "intersection", "union", "difference", "regex"]
description = "Manipulate regex and automaton as if they were sets."
description = "High-performance Rust library for building, combining, and analyzing regular expressions and finite automata"
readme = "README.md"

[dependencies]
serde = { version = "1.0", features = ["derive"], optional = true }
ciborium = { version = "0.2.2", optional = true }
z85 = { version = "3.0.5", optional = true }
aes-gcm-siv = { version = "0.11.1", optional = true }
sha2 = { version = "0.10.8", optional = true }
flate2 = { version = "1.0.30", features = [
"zlib-ng",
], default-features = false, optional = true }
Expand All @@ -26,6 +24,8 @@ lazy_static = "1.4.0"
regex = "1.10.3"
regex-syntax = "0.8.5"
regex-charclass = { version = "1.0.3" }
rayon = "1.10.0"
bit-set = "0.8.0"

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
Expand All @@ -34,17 +34,15 @@ serde_json = "1.0.114"


[features]
default = ["serde"]
serde = [
default = []
serializable = [
"regex-charclass/serde",
"dep:serde",
"dep:ciborium",
"dep:z85",
"dep:aes-gcm-siv",
"dep:sha2",
"dep:flate2",
]

[[bench]]
name = "my_benchmark"
harness = false
harness = false
314 changes: 276 additions & 38 deletions README.md

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions benches/my_benchmark.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use ahash::AHashSet;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use regexsolver::{fast_automaton::FastAutomaton, regex::RegularExpression};

Expand All @@ -7,18 +6,18 @@ fn parse_regex(regex: &str) -> RegularExpression {
}

fn to_regex(automaton: &FastAutomaton) -> RegularExpression {
automaton.to_regex().unwrap()
automaton.to_regex()
}

fn determinize(automaton: &FastAutomaton) -> FastAutomaton {
automaton.determinize().unwrap()
automaton.determinize().unwrap().into_owned()
}

fn intersection(automaton_1: &FastAutomaton, automaton_2: &FastAutomaton) -> FastAutomaton {
automaton_1.intersection(automaton_2).unwrap()
}

fn generate_strings(automaton: &FastAutomaton) -> AHashSet<String> {
fn generate_strings(automaton: &FastAutomaton) -> Vec<String> {
automaton.generate_strings(2000).unwrap()
}

Expand Down
6 changes: 3 additions & 3 deletions src/cardinality/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#[cfg(feature = "serde")]
#[cfg(feature = "serializable")]
use serde::{Deserialize, Serialize};

/// Represent a number.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serializable", derive(Serialize, Deserialize))]
#[derive(PartialEq, Eq, Debug, Clone)]
#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))]
#[cfg_attr(feature = "serializable", serde(tag = "type", content = "value", rename_all = "camelCase"))]
pub enum Cardinality<U> {
/// An infinite number.
Infinite,
Expand Down
43 changes: 12 additions & 31 deletions src/error/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::fmt::{self};

use crate::tokenizer::token::TokenError;
#[cfg(feature = "serializable")]
use crate::fast_automaton::serializer::tokenizer::token::TokenError;

/// An error thrown by the engine.
#[derive(Debug, PartialEq, Eq)]
Expand All @@ -9,18 +10,13 @@ pub enum EngineError {
InvalidCharacterInRegex,
/// The operation took too much time.
OperationTimeOutError,
/// The given automaton should be deterministic.
AutomatonShouldBeDeterministic,
/// The automaton has too many states.
AutomatonHasTooManyStates,
/// The regular expression can not be parsed.
RegexSyntaxError(String),
/// Too many terms are used in the operation.
TooMuchTerms(usize, usize),
/// The provided range can not be built from the spanning set.
ConditionInvalidRange,
/// The provided index is out of bound of the condition.
ConditionIndexOutOfBound,
#[cfg(feature = "serializable")]
/// There is an error with one of the token.
TokenError(TokenError),
}
Expand All @@ -30,33 +26,18 @@ impl fmt::Display for EngineError {
match self {
EngineError::InvalidCharacterInRegex => write!(f, "Invalid character used in regex."),
EngineError::OperationTimeOutError => write!(f, "The operation took too much time."),
EngineError::AutomatonShouldBeDeterministic => write!(f, "The given automaton should be deterministic."),
EngineError::AutomatonHasTooManyStates => write!(f, "The automaton has too many states."),
EngineError::AutomatonHasTooManyStates => {
write!(f, "The automaton has too many states.")
}
EngineError::RegexSyntaxError(err) => write!(f, "{err}."),
EngineError::TooMuchTerms(max, got) => write!(f, "Too many terms are used in this operation, the maximum allowed for your plan is {max} and you used {got}."),
EngineError::TokenError(err) => write!(f, "{err}."),
EngineError::ConditionInvalidRange => write!(f, "The provided range can not be built from the spanning set."),
EngineError::ConditionIndexOutOfBound => write!(f, "The provided index is out of bound of the condition."),
#[cfg(feature = "serializable")]
EngineError::TokenError(err) => write!(f, "{err}."),
EngineError::ConditionInvalidRange => write!(
f,
"The provided range can not be built from the spanning set."
),
}
}
}

impl std::error::Error for EngineError {}

impl EngineError {
/// Determine if the error is a server error.
/// A server error should not be shown to the end user.
pub fn is_server_error(&self) -> bool {
match self {
EngineError::InvalidCharacterInRegex => false,
EngineError::OperationTimeOutError => false,
EngineError::AutomatonShouldBeDeterministic => true,
EngineError::AutomatonHasTooManyStates => false,
EngineError::RegexSyntaxError(_) => false,
EngineError::TooMuchTerms(_, _) => false,
EngineError::TokenError(_) => false,
EngineError::ConditionInvalidRange => true,
EngineError::ConditionIndexOutOfBound => true,
}
}
}
Loading