From 5c9840eab9477c3964b1a310ede4f5e3070ccc67 Mon Sep 17 00:00:00 2001 From: Canop Date: Fri, 21 Jan 2022 22:25:25 +0100 Subject: [PATCH] change some panics into managed errors so that the calling application doesn't crash --- src/crash.rs | 27 +++++++++++ src/easy.rs | 7 +-- src/html.rs | 15 +++--- src/lib.rs | 6 +++ src/parsing/parser.rs | 78 ++++++++++++++++++-------------- src/parsing/syntax_definition.rs | 13 +++--- 6 files changed, 98 insertions(+), 48 deletions(-) create mode 100644 src/crash.rs diff --git a/src/crash.rs b/src/crash.rs new file mode 100644 index 00000000..a79a142d --- /dev/null +++ b/src/crash.rs @@ -0,0 +1,27 @@ +use { + std::fmt, +}; + +#[derive(Debug, Clone)] +pub struct CrashError { + message: String, +} + +impl CrashError { + pub fn new>(s: S) -> Self { + Self { + message: s.into(), + } + } +} + +impl fmt::Display for CrashError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Syntect crashed : {:?}", self.message) + } +} + +#[macro_export] +macro_rules! crash { + ($($arg:tt)*) => (Err(CrashError::new(&format!($($arg)*)))); +} diff --git a/src/easy.rs b/src/easy.rs index 6d33dced..8c94b098 100644 --- a/src/easy.rs +++ b/src/easy.rs @@ -8,6 +8,7 @@ use std::io::{self, BufReader}; use std::fs::File; use std::path::Path; // use util::debug_print_ops; +use crate::CrashError; /// Simple way to go directly from lines of text to colored tokens. /// @@ -57,14 +58,14 @@ impl<'a> HighlightLines<'a> { } /// Highlights a line of a file - pub fn highlight<'b>(&mut self, line: &'b str, syntax_set: &SyntaxSet) -> Vec<(Style, &'b str)> { + pub fn highlight<'b>(&mut self, line: &'b str, syntax_set: &SyntaxSet) -> Result, CrashError> { // println!("{}", self.highlight_state.path); - let ops = self.parse_state.parse_line(line, syntax_set); + let ops = self.parse_state.parse_line(line, syntax_set)?; // use util::debug_print_ops; // debug_print_ops(line, &ops); let iter = HighlightIterator::new(&mut self.highlight_state, &ops[..], line, &self.highlighter); - iter.collect() + Ok(iter.collect()) } } diff --git a/src/html.rs b/src/html.rs index a97ba0ce..e7a1ce82 100644 --- a/src/html.rs +++ b/src/html.rs @@ -11,6 +11,7 @@ use std::fmt::Write; use std::io::{self, BufRead}; use std::path::Path; +use crate::CrashError; /// Output HTML for a line of code with `` elements using class names /// @@ -86,8 +87,8 @@ impl<'a> ClassedHTMLGenerator<'a> { /// /// *Note:* This function requires `line` to include a newline at the end and /// also use of the `load_defaults_newlines` version of the syntaxes. - pub fn parse_html_for_line_which_includes_newline(&mut self, line: &str) { - let parsed_line = self.parse_state.parse_line(line, self.syntax_set); + pub fn parse_html_for_line_which_includes_newline(&mut self, line: &str) -> Result<(), CrashError> { + let parsed_line = self.parse_state.parse_line(line, self.syntax_set)?; let (formatted_line, delta) = line_tokens_to_classed_spans( line, parsed_line.as_slice(), @@ -96,6 +97,7 @@ impl<'a> ClassedHTMLGenerator<'a> { ); self.open_spans += delta; self.html.push_str(formatted_line.as_str()); + Ok(()) } /// Parse the line of code and update the internal HTML buffer with tagged HTML @@ -268,12 +270,12 @@ pub fn highlighted_html_for_string( ss: &SyntaxSet, syntax: &SyntaxReference, theme: &Theme, -) -> String { +) -> Result { let mut highlighter = HighlightLines::new(syntax, theme); let (mut output, bg) = start_highlighted_html_snippet(theme); for line in LinesWithEndings::from(s) { - let regions = highlighter.highlight(line, ss); + let regions = highlighter.highlight(line, ss)?; append_highlighted_html_for_styled_line( ®ions[..], IncludeBackground::IfDifferent(bg), @@ -281,7 +283,7 @@ pub fn highlighted_html_for_string( ); } output.push_str("\n"); - output + Ok(output) } /// Convenience method that combines `start_highlighted_html_snippet`, `styled_line_to_highlighted_html` @@ -301,7 +303,8 @@ pub fn highlighted_html_for_file>( let mut line = String::new(); while highlighter.reader.read_line(&mut line)? > 0 { { - let regions = highlighter.highlight_lines.highlight(&line, ss); + let regions = highlighter.highlight_lines.highlight(&line, ss) + .map_err(|_| io::Error::new(io::ErrorKind::Other, "syntect crashed"))?; append_highlighted_html_for_styled_line( ®ions[..], IncludeBackground::IfDifferent(bg), diff --git a/src/lib.rs b/src/lib.rs index 6ae5cea3..2f11e5b2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,12 @@ extern crate serde_derive; #[macro_use] extern crate pretty_assertions; + +#[macro_use] +mod crash; + +pub use crash::*; + #[cfg(any(feature = "dump-load-rs", feature = "dump-load", feature = "dump-create", feature = "dump-create-rs"))] pub mod dumps; #[cfg(feature = "parsing")] diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index e44d223e..2b4f6e0d 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -19,6 +19,7 @@ use std::i32; use std::hash::BuildHasherDefault; use fnv::FnvHasher; use crate::parsing::syntax_set::{SyntaxSet, SyntaxReference}; +use crate::CrashError; /// Keeps the current parser state (the internal syntax interpreter stack) between lines of parsing. /// @@ -194,7 +195,11 @@ impl ParseState { /// [`ScopeStack::apply`]: struct.ScopeStack.html#method.apply /// [`SyntaxSet`]: struct.SyntaxSet.html /// [`ParseState`]: struct.ParseState.html - pub fn parse_line(&mut self, line: &str, syntax_set: &SyntaxSet) -> Vec<(usize, ScopeStackOp)> { + pub fn parse_line( + &mut self, + line: &str, + syntax_set: &SyntaxSet, + ) -> Result, CrashError> { assert!(!self.stack.is_empty(), "Somehow main context was popped from the stack"); let mut match_start = 0; @@ -223,9 +228,9 @@ impl ParseState { &mut regions, &mut non_consuming_push_at, &mut res - ) {} + )? {} - res + Ok(res) } #[allow(clippy::too_many_arguments)] @@ -238,7 +243,7 @@ impl ParseState { regions: &mut Region, non_consuming_push_at: &mut (usize, usize), ops: &mut Vec<(usize, ScopeStackOp)>, - ) -> bool { + ) -> Result { let check_pop_loop = { let (pos, stack_depth) = *non_consuming_push_at; pos == *start && stack_depth == self.stack.len() @@ -249,9 +254,9 @@ impl ParseState { self.proto_starts.pop(); } - let best_match = self.find_best_match(line, *start, syntax_set, search_cache, regions, check_pop_loop); + let best_match = self.find_best_match(line, *start, syntax_set, search_cache, regions, check_pop_loop)?; - if let Some(reg_match) = best_match { + Ok(if let Some(reg_match) = best_match { if reg_match.would_loop { // A push that doesn't consume anything (a regex that resulted // in an empty match at the current position) can not be @@ -268,11 +273,11 @@ impl ParseState { // unicode characters can be more than 1 byte. if let Some((i, _)) = line[*start..].char_indices().nth(1) { *start += i; - return true; + return Ok(true); } else { // End of line, no character to advance and no point trying // any more patterns. - return false; + return Ok(false); } } @@ -285,7 +290,7 @@ impl ParseState { // check the next "pop" for loops. Otherwise leave the state, // e.g. non-consuming "set" could also result in a loop. let context = reg_match.context; - let match_pattern = context.match_at(reg_match.pat_index); + let match_pattern = context.match_at(reg_match.pat_index)?; if let MatchOperation::Push(_) = match_pattern.operation { *non_consuming_push_at = (match_end, self.stack.len() + 1); } @@ -303,12 +308,12 @@ impl ParseState { let id = &self.stack[self.stack.len() - 1].context; syntax_set.get_context(id) }; - self.exec_pattern(line, ®_match, level_context, syntax_set, ops); + self.exec_pattern(line, ®_match, level_context, syntax_set, ops)?; true } else { false - } + }) } fn find_best_match<'a>( @@ -319,7 +324,7 @@ impl ParseState { search_cache: &mut SearchCache, regions: &mut Region, check_pop_loop: bool, - ) -> Option> { + ) -> Result>, CrashError> { let cur_level = &self.stack[self.stack.len() - 1]; let context = syntax_set.get_context(&cur_level.context); let prototype = if let Some(ref p) = context.prototype { @@ -347,7 +352,7 @@ impl ParseState { for (from_with_proto, ctx, captures) in context_chain { for (pat_context, pat_index) in context_iter(syntax_set, syntax_set.get_context(ctx)) { - let match_pat = pat_context.match_at(pat_index); + let match_pat = pat_context.match_at(pat_index)?; if let Some(match_region) = self.search( line, start, match_pat, captures, search_cache, regions @@ -381,13 +386,13 @@ impl ParseState { if match_start == start && !pop_would_loop { // We're not gonna find a better match after this, // so as an optimization we can stop matching now. - return best_match; + return Ok(best_match); } } } } } - best_match + Ok(best_match) } fn search(&self, @@ -457,13 +462,13 @@ impl ParseState { level_context: &'a Context, syntax_set: &'a SyntaxSet, ops: &mut Vec<(usize, ScopeStackOp)>, - ) -> bool { + ) -> Result { let (match_start, match_end) = reg_match.regions.pos(0).unwrap(); let context = reg_match.context; - let pat = context.match_at(reg_match.pat_index); + let pat = context.match_at(reg_match.pat_index)?; // println!("running pattern {:?} on '{}' at {}, operation {:?}", pat.regex_str, line, match_start, pat.operation); - self.push_meta_ops(true, match_start, level_context, &pat.operation, syntax_set, ops); + self.push_meta_ops(true, match_start, level_context, &pat.operation, syntax_set, ops)?; for s in &pat.scope { // println!("pushing {:?} at {}", s, match_start); ops.push((match_start, ScopeStackOp::Push(*s))); @@ -496,9 +501,9 @@ impl ParseState { // println!("popping at {}", match_end); ops.push((match_end, ScopeStackOp::Pop(pat.scope.len()))); } - self.push_meta_ops(false, match_end, &*level_context, &pat.operation, syntax_set, ops); + self.push_meta_ops(false, match_end, &*level_context, &pat.operation, syntax_set, ops)?; - self.perform_op(line, ®_match.regions, pat, syntax_set) + Ok(self.perform_op(line, ®_match.regions, pat, syntax_set)) } fn push_meta_ops<'a>( @@ -509,7 +514,7 @@ impl ParseState { match_op: &MatchOperation, syntax_set: &'a SyntaxSet, ops: &mut Vec<(usize, ScopeStackOp)>, - ) { + ) -> Result<(), CrashError> { // println!("metas ops for {:?}, initial: {}", // match_op, // initial); @@ -545,7 +550,7 @@ impl ParseState { } // add each context's meta scope for r in context_refs.iter() { - let ctx = r.resolve(syntax_set); + let ctx = r.resolve(syntax_set)?; if !is_set { if let Some(clear_amount) = ctx.clear_scopes { @@ -558,17 +563,23 @@ impl ParseState { } } } else { - let repush = (is_set && (!cur_context.meta_scope.is_empty() || !cur_context.meta_content_scope.is_empty())) || context_refs.iter().any(|r| { - let ctx = r.resolve(syntax_set); - - !ctx.meta_content_scope.is_empty() || (ctx.clear_scopes.is_some() && is_set) - }); + let mut repush = is_set && (!cur_context.meta_scope.is_empty() || !cur_context.meta_content_scope.is_empty()); + if !repush { + for r in context_refs.iter() { + let ctx = r.resolve(syntax_set)?; + if !ctx.meta_content_scope.is_empty() || (ctx.clear_scopes.is_some() && is_set) { + repush = true; + break; + } + } + } if repush { // remove previously pushed meta scopes, so that meta content scopes will be applied in the correct order - let mut num_to_pop : usize = context_refs.iter().map(|r| { - let ctx = r.resolve(syntax_set); - ctx.meta_scope.len() - }).sum(); + let mut num_to_pop : usize = 0; + for r in context_refs.iter() { + let ctx = r.resolve(syntax_set)?; + num_to_pop += ctx.meta_scope.len(); + } // also pop off the original context's meta scopes if is_set { @@ -582,7 +593,7 @@ impl ParseState { // now we push meta scope and meta context scope for each context pushed for r in context_refs { - let ctx = r.resolve(syntax_set); + let ctx = r.resolve(syntax_set)?; // for some reason, contrary to my reading of the docs, set does this after the token if is_set { @@ -603,6 +614,7 @@ impl ParseState { }, MatchOperation::None => (), } + Ok(()) } /// Returns true if the stack was changed @@ -1729,7 +1741,7 @@ contexts: expect_scope_stacks_with_syntax("aa", &["", ""], syntax); } - + #[test] fn can_include_nested_backrefs() { let syntax = SyntaxDefinition::load_from_str(r#" diff --git a/src/parsing/syntax_definition.rs b/src/parsing/syntax_definition.rs index a064e5e8..31e62a3b 100644 --- a/src/parsing/syntax_definition.rs +++ b/src/parsing/syntax_definition.rs @@ -11,6 +11,7 @@ use super::regex::{Regex, Region}; use regex_syntax::escape; use serde::{Serialize, Serializer}; use crate::parsing::syntax_set::SyntaxSet; +use crate::CrashError; pub type CaptureMapping = Vec<(usize, Vec)>; @@ -175,20 +176,20 @@ pub fn context_iter<'a>(syntax_set: &'a SyntaxSet, context: &'a Context) -> Matc impl Context { /// Returns the match pattern at an index, panics if the thing isn't a match pattern - pub fn match_at(&self, index: usize) -> &MatchPattern { + pub fn match_at(&self, index: usize) -> Result<&MatchPattern, CrashError> { match self.patterns[index] { - Pattern::Match(ref match_pat) => match_pat, - _ => panic!("bad index to match_at"), + Pattern::Match(ref match_pat) => Ok(match_pat), + _ => crash!("bad index to match_at"), } } } impl ContextReference { /// find the pointed to context, panics if ref is not linked - pub fn resolve<'a>(&self, syntax_set: &'a SyntaxSet) -> &'a Context { + pub fn resolve<'a>(&self, syntax_set: &'a SyntaxSet) -> Result<&'a Context, CrashError> { match *self { - ContextReference::Direct(ref context_id) => syntax_set.get_context(context_id), - _ => panic!("Can only call resolve on linked references: {:?}", self), + ContextReference::Direct(ref context_id) => Ok(syntax_set.get_context(context_id)), + _ => crash!("Can only call resolve on linked references: {:?}", self), } }