Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Improved infrastructure for word mode support.

  • Loading branch information...
commit 2f0066bdec28592f26af17c6992fb6ff21d2571e 1 parent bd44cad
@JanX2 authored
View
3  DiffMatchPatch.h
@@ -158,8 +158,9 @@ typedef enum {
- (NSMutableArray *)diff_computeFromOldString:(NSString *)text1 andNewString:(NSString *)text2 checkLines:(BOOL)checklines deadline:(NSTimeInterval)deadline;
- (NSMutableArray *)diff_lineModeFromOldString:(NSString *)text1 andNewString:(NSString *)text2 deadline:(NSTimeInterval)deadline;
- (NSArray *)diff_linesToCharsForFirstString:(NSString *)text1 andSecondString:(NSString *)text1;
-- (NSArray *)diff_linesToWordsForFirstString:(NSString *)text1 andSecondString:(NSString *)text1;
+- (NSArray *)diff_wordsToCharsForFirstString:(NSString *)text1 andSecondString:(NSString *)text1;
- (NSString *)diff_linesToCharsMungeOfText:(NSString *)text lineArray:(NSMutableArray *)lineArray lineHash:(NSMutableDictionary *)lineHash;
+- (NSString *)diff_wordsToCharsMungeOfText:(NSString *)text wordArray:(NSMutableArray *)wordArray wordHash:(NSMutableDictionary *)wordHash;
- (void)diff_chars:(NSArray *)diffs toLines:(NSMutableArray *)lineArray;
- (NSMutableArray *)diff_bisectOfOldString:(NSString *)text1 andNewString:(NSString *)text2 deadline:(NSTimeInterval)deadline;
- (NSMutableArray *)diff_bisectSplitOfOldString:(NSString *)text1 andNewString:(NSString *)text2 x:(NSUInteger)x y:(NSUInteger)y deadline:(NSTimeInterval)deadline;
View
47 DiffMatchPatch.m
@@ -644,6 +644,23 @@ - (NSString *)diff_linesToCharsMungeOfText:(NSString *)text
}
/**
+ * Split a text into a list of strings. Reduce the texts to a string of
+ * hashes where each Unicode character represents one word (or boundary between words).
+ * @param text NSString to encode.
+ * @param wordArray NSMutableArray of unique strings.
+ * @param wordHash Map of strings to indices.
+ * @return Encoded string.
+ */
+- (NSString *)diff_wordsToCharsMungeOfText:(NSString *)text
+ wordArray:(NSMutableArray *)wordArray
+ wordHash:(NSMutableDictionary *)wordHash;
+{
+ return [NSMakeCollectable(diff_wordsToCharsMungeCFStringCreate((CFStringRef)text,
+ (CFMutableArrayRef)wordArray,
+ (CFMutableDictionaryRef)wordHash)) autorelease];
+}
+
+/**
* Find the 'middle snake' of a diff, split the problem in two
* and return the recursively constructed diff.
* See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
@@ -886,26 +903,26 @@ - (NSArray *)diff_linesToCharsForFirstString:(NSString *)text1
* encoded text2 and the NSMutableArray of unique strings. The zeroth element
* of the NSArray of unique strings is intentionally blank.
*/
-- (NSArray *)diff_linesToWordsForFirstString:(NSString *)text1
+- (NSArray *)diff_wordsToCharsForFirstString:(NSString *)text1
andSecondString:(NSString *)text2;
{
- NSMutableArray *lineArray = [NSMutableArray array]; // NSString objects
- NSMutableDictionary *lineHash = [NSMutableDictionary dictionary]; // keys: NSString, values:NSNumber
- // e.g. [lineArray objectAtIndex:4] == "Hello\n"
- // e.g. [lineHash objectForKey:"Hello\n"] == 4
+ NSMutableArray *wordArray = [NSMutableArray array]; // NSString objects
+ NSMutableDictionary *wordHash = [NSMutableDictionary dictionary]; // keys: NSString, values:NSNumber
+ // e.g. [wordArray objectAtIndex:4] == "Hello"
+ // e.g. [wordHash objectForKey:"Hello"] == 4
// "\x00" is a valid character, but various debuggers don't like it.
// So we'll insert a junk entry to avoid generating a nil character.
- [lineArray addObject:@""];
-
- NSString *words1 = (NSString *)diff_linesToWordsMungeCFStringCreate((CFStringRef)text1,
- (CFMutableArrayRef)lineArray,
- (CFMutableDictionaryRef)lineHash);
- NSString *words2 = (NSString *)diff_linesToWordsMungeCFStringCreate((CFStringRef)text2,
- (CFMutableArrayRef)lineArray,
- (CFMutableDictionaryRef)lineHash);
-
- NSArray *result = [NSArray arrayWithObjects:words1, words2, lineArray, nil];
+ [wordArray addObject:@""];
+
+ NSString *words1 = NSMakeCollectable(diff_wordsToCharsMungeCFStringCreate((CFStringRef)text1,
+ (CFMutableArrayRef)wordArray,
+ (CFMutableDictionaryRef)wordHash));
+ NSString *words2 = NSMakeCollectable(diff_wordsToCharsMungeCFStringCreate((CFStringRef)text2,
+ (CFMutableArrayRef)wordArray,
+ (CFMutableDictionaryRef)wordHash));
+
+ NSArray *result = [NSArray arrayWithObjects:words1, words2, wordArray, nil];
[words1 release];
[words2 release];
View
20 DiffMatchPatchCFUtilities.c
@@ -523,7 +523,7 @@ CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArra
* @param lineHash Map of strings to indices.
* @return Encoded CFStringRef.
*/
-CFStringRef diff_linesToWordsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef tokenArray, CFMutableDictionaryRef tokenHash) {
+CFStringRef diff_wordsToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef tokenArray, CFMutableDictionaryRef tokenHash) {
CFStringRef token;
CFMutableStringRef chars = CFStringCreateMutable(kCFAllocatorDefault, 0);
@@ -532,10 +532,9 @@ CFStringRef diff_linesToWordsMungeCFStringCreate(CFStringRef text, CFMutableArra
//CFLocaleRef currentLocale = CFLocaleCopyCurrent();
- CFOptionFlags options = kCFStringTokenizerUnitWord;
+ CFOptionFlags options = kCFStringTokenizerUnitWordBoundary;
CFRange tokenizerRange = CFRangeMake(0, textLength);
- // The locale parameter is ignored for tokenizing by words
CFStringTokenizerRef tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, text, tokenizerRange, options, NULL);
//CFRelease(currentLocale);
@@ -545,31 +544,16 @@ CFStringRef diff_linesToWordsMungeCFStringCreate(CFStringRef text, CFMutableArra
// Walk the text, pulling out a substring for each word (or boundary between words).
CFRange tokenRange;
- CFIndex prevTokenEnd = 0;
while (mask != kCFStringTokenizerTokenNone) {
tokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer);
- if (tokenRange.location > prevTokenEnd) {
- token = diff_CFStringCreateJavaSubstring(text, prevTokenEnd, tokenRange.location);
- diff_linesMungeHelper(token, tokenArray, tokenHash, chars);
- CFRelease(token);
- }
-
token = diff_CFStringCreateSubstring(text, tokenRange.location, tokenRange.length);
diff_linesMungeHelper(token, tokenArray, tokenHash, chars);
CFRelease(token);
- prevTokenEnd = tokenRange.location + tokenRange.length;
-
mask = CFStringTokenizerAdvanceToNextToken(tokenizer);
}
- if (prevTokenEnd <= textLength - 1) {
- token = diff_CFStringCreateJavaSubstring(text, prevTokenEnd, textLength);
- diff_linesMungeHelper(token, tokenArray, tokenHash, chars);
- CFRelease(token);
- }
-
CFRelease(tokenizer);
return chars;
View
2  DiffMatchPatchCFUtilities.h
@@ -33,7 +33,7 @@ CFArrayRef diff_halfMatchCreate(CFStringRef text1, CFStringRef text2, const floa
CFArrayRef diff_halfMatchICreate(CFStringRef longtext, CFStringRef shorttext, CFIndex i);
CFStringRef diff_linesToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef lineArray, CFMutableDictionaryRef lineHash);
-CFStringRef diff_linesToWordsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef tokenArray, CFMutableDictionaryRef tokenHash);
+CFStringRef diff_wordsToCharsMungeCFStringCreate(CFStringRef text, CFMutableArrayRef tokenArray, CFMutableDictionaryRef tokenHash);
CFIndex diff_cleanupSemanticScore(CFStringRef one, CFStringRef two);
View
53 Tests/DiffMatchPatchTest.m
@@ -185,7 +185,7 @@ - (void)test_diff_linesToCharsTest {
[dmp release];
}
-- (void)test_diff_linesToWordsTest {
+- (void)test_diff_wordsToCharsTest {
DiffMatchPatch *dmp = [DiffMatchPatch new];
NSArray *result;
@@ -196,11 +196,60 @@ - (void)test_diff_linesToWordsTest {
[tmpVector addObject:@" "];
[tmpVector addObject:@"beta"];
[tmpVector addObject:@"\n"];
- result = [dmp diff_linesToWordsForFirstString:@"alpha beta alpha\n" andSecondString:@"beta alpha beta\n"];
+ result = [dmp diff_wordsToCharsForFirstString:@"alpha beta alpha\n" andSecondString:@"beta alpha beta\n"];
STAssertEqualObjects(@"\001\002\003\002\001\004", [result objectAtIndex:0], @"Convert words down to characters #1");
STAssertEqualObjects(@"\003\002\001\002\003\004", [result objectAtIndex:1], @"Convert words down to characters #2");
STAssertEqualObjects(tmpVector, (NSArray *)[result objectAtIndex:2], @"Convert words down to characters #3");
+ [tmpVector removeAllObjects];
+ [tmpVector addObject:@""];
+ [tmpVector addObject:@"alpha"];
+ [tmpVector addObject:@"\r"];
+ [tmpVector addObject:@" "];
+ [tmpVector addObject:@"beta"];
+ [tmpVector addObject:@"\r\n"];
+ result = [dmp diff_wordsToCharsForFirstString:@"" andSecondString:@"alpha\r beta\r \r \r\n"];
+ STAssertEqualObjects(@"", [result objectAtIndex:0], @"Convert words down to characters #4");
+ STAssertEqualObjects(@"\001\002\003\004\002\003\002\003\005", [result objectAtIndex:1], @"Convert words down to characters #5");
+ STAssertEqualObjects(tmpVector, (NSArray *)[result objectAtIndex:2], @"Convert words down to characters #6");
+
+ [tmpVector removeAllObjects];
+ [tmpVector addObject:@""];
+ [tmpVector addObject:@"a"];
+ [tmpVector addObject:@"b"];
+ result = [dmp diff_wordsToCharsForFirstString:@"a" andSecondString:@"b"];
+ STAssertEqualObjects(@"\001", [result objectAtIndex:0], @"Convert words down to characters #7");
+ STAssertEqualObjects(@"\002", [result objectAtIndex:1], @"Convert words down to characters #8");
+ STAssertEqualObjects(tmpVector, (NSArray *)[result objectAtIndex:2], @"Convert words down to characters #9");
+
+ // More than 256 to reveal any 8-bit limitations.
+ unichar n = 300;
+ [tmpVector removeAllObjects];
+ NSMutableString *words = [NSMutableString string];
+ NSMutableString *chars = [NSMutableString string];
+
+ [words appendString:@" "];
+
+ NSString *currentWord;
+ unichar i;
+ for (unichar x = 1; x < n + 1; x++) {
+ i = x + 1;
+ currentWord = [NSString stringWithFormat:@"%d ", (int)x];
+ [tmpVector addObject:[NSString stringWithFormat:@"%d", (int)x]];
+ [words appendString:currentWord];
+ [chars appendString:[NSString stringWithFormat:@"%C\001", i]];
+ }
+ STAssertEquals((NSUInteger)n, tmpVector.count, @"Convert words down to characters #10");
+ STAssertEquals((NSUInteger)n, chars.length/2, @"Convert words down to characters #11");
+ [tmpVector insertObject:@"" atIndex:0];
+ [tmpVector insertObject:@" " atIndex:1];
+ [chars insertString:@"\001" atIndex:0];
+ result = [dmp diff_wordsToCharsForFirstString:words andSecondString:@""];
+ NSMutableString *charsCmp = [result objectAtIndex:0];
+ STAssertEqualObjects(chars, charsCmp, @"Convert words down to characters #12");
+ STAssertEqualObjects(@"", [result objectAtIndex:1], @"Convert words down to characters #13");
+ STAssertEqualObjects(tmpVector, (NSArray *)[result objectAtIndex:2], @"Convert words down to characters #14");
+
[dmp release];
}
Please sign in to comment.
Something went wrong with that request. Please try again.