Skip to content

Commit cd11704

Browse files
committed
omg nom parsing works… (#198)
…but really, it's not super nice especially considering the counting of consumed bytes is based on ptr arithmetic.
1 parent d9afc22 commit cd11704

File tree

3 files changed

+46
-55
lines changed

3 files changed

+46
-55
lines changed

git-object/src/commit/message.rs

Lines changed: 32 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -8,72 +8,51 @@ use crate::{
88
mod decode {
99
use crate::bstr::{BStr, ByteSlice};
1010
use nom::branch::alt;
11-
use nom::bytes::complete::{tag, take_till1, take_until1};
12-
use nom::combinator::{all_consuming, map, opt, peek, recognize};
11+
use nom::bytes::complete::{tag, take_till1};
12+
use nom::combinator::all_consuming;
1313
use nom::error::ParseError;
14-
use nom::multi::{fold_many1, length_data};
15-
use nom::sequence::{pair, terminated};
14+
use nom::sequence::pair;
1615
use nom::IResult;
16+
use std::convert::TryInto;
1717

1818
fn newline<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E> {
1919
alt((tag(b"\r\n"), tag(b"\n")))(i)
2020
}
2121

22-
fn subject<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a BStr, E> {
23-
map(
24-
length_data(peek(fold_many1(
25-
recognize(terminated(take_till1(|c| c == b'\n'), tag(b"\n"))),
26-
|| 0,
27-
|acc: usize, item: &'a [u8]| acc + item.len(),
28-
))),
29-
|s| s.as_bstr(),
30-
)(i)
31-
}
32-
33-
#[cfg(test)]
34-
mod tests {
35-
use super::*;
36-
37-
#[test]
38-
fn subject_ending_in_newline_with_newline() {
39-
assert_eq!(subject::<()>(b"a\nb\n").unwrap(), (b"".as_ref(), b"a\nb\nc".as_bstr()));
40-
}
41-
42-
#[test]
43-
fn subject_with_single_newline() {
44-
assert_eq!(subject::<()>(b"a\n").unwrap(), (b"".as_ref(), b"a\n".as_bstr()));
45-
}
46-
}
47-
48-
/// Parse a signature from the bytes input `i` using `nom`.
4922
pub fn nomfoo<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> {
50-
let (rest, subject) = opt(terminated(subject, pair(newline, newline)))(i)?;
51-
Ok((
52-
&[],
53-
match subject {
54-
Some(subject) => (subject.as_bstr(), (!rest.is_empty()).then(|| rest.as_bstr())),
55-
None => (i.as_bstr(), None),
56-
},
57-
))
23+
let mut c = i;
24+
while !c.is_empty() {
25+
c = match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r')(c) {
26+
Ok((i1, segment)) => match pair::<_, _, _, E, _, _>(newline, newline)(i1) {
27+
Ok((body, _)) => {
28+
// SAFETY: the pointers are pointing to the same slice.
29+
#[allow(unsafe_code)]
30+
let consumed_bytes = unsafe { segment.as_ptr_range().end.offset_from(i.as_ptr()) };
31+
return Ok((
32+
&[],
33+
(
34+
&i[0usize..consumed_bytes.try_into().expect("positive offset")].as_bstr(),
35+
(!body.is_empty()).then(|| body.as_bstr()),
36+
),
37+
));
38+
}
39+
Err(_) => match i1.get(1..) {
40+
Some(next) => next,
41+
None => break,
42+
},
43+
},
44+
Err(_) => match c.get(1..) {
45+
Some(next) => next,
46+
None => break,
47+
},
48+
};
49+
}
50+
Ok((&[], (i.as_bstr(), None)))
5851
}
5952

6053
/// Returns title and body, without separator
6154
pub fn bytes(message: &[u8]) -> (&BStr, Option<&BStr>) {
6255
all_consuming(nomfoo::<()>)(message).expect("cannot fail").1
63-
// match message
64-
// .find(b"\n\n")
65-
// .map(|pos| (2, pos))
66-
// .or_else(|| message.find(b"\r\n\r\n").map(|pos| (4, pos)))
67-
// {
68-
// Some((sep_len, end_of_title)) => {
69-
// let body = &message[end_of_title + sep_len..];
70-
// (
71-
// message[..end_of_title].as_bstr(),
72-
// if body.is_empty() { None } else { Some(body.as_bstr()) },
73-
// )
74-
// }
75-
// None => (message.as_bstr(), None),
76-
// }
7756
}
7857
}
7958

git-object/src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
//! This crate provides types for [read-only git objects][crate::ObjectRef] backed by bytes provided in git's serialization format
22
//! as well as [mutable versions][Object] of these. Both types of objects can be encoded.
3-
#![forbid(unsafe_code)]
4-
#![deny(rust_2018_idioms, missing_docs)]
3+
#![deny(unsafe_code, rust_2018_idioms, missing_docs)]
54

65
use std::borrow::Cow;
76

git-object/tests/immutable/commit/message.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,19 @@ fn title_with_whitespace_and_body() {
6666
);
6767
}
6868

69+
#[test]
70+
fn title_with_more_whitespace_and_body() {
71+
let msg = MessageRef::from_bytes(b"hello \r\r\r\n there\nanother line\n\nthe body\n\n");
72+
assert_eq!(msg.summary().as_ref(), "hello there another line");
73+
assert_eq!(
74+
msg,
75+
MessageRef {
76+
title: b"hello \r\r\r\n there\nanother line".as_bstr(),
77+
body: Some(b"the body\n\n".as_bstr())
78+
}
79+
);
80+
}
81+
6982
#[test]
7083
fn title_with_whitespace_and_body_windows_lineending() {
7184
let msg = MessageRef::from_bytes(b"hello \r\n \r\n there\nanother line\r\n\r\nthe body\n\r\n");

0 commit comments

Comments
 (0)