Skip to content

Commit

Permalink
Added whitespace normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
odrobnik committed Jan 17, 2011
1 parent 04a449d commit 7c41248
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 24 deletions.
38 changes: 20 additions & 18 deletions Classes/NSAttributedString+HTML.m
Expand Up @@ -209,12 +209,6 @@ - (id)initWithHTML:(NSData *)data options:(NSDictionary *)options documentAttrib

CTParagraphStyleRef paragraphStyle = createParagraphStyle(0, 0, 0, 0);

// [localAttributes setObject:(id)paragraphStyle forKey:(id)kCTParagraphStyleAttributeName];

// NSString *fontColor = [currentTagAttributes objectForKey:@"color"];

// [attributes setObject:(id)[color CGColor] forKey:(id)kCTForegroundColorAttributeName];

NSMutableDictionary *localAttributes = [NSMutableDictionary dictionaryWithObjectsAndKeys:attachment, @"DTTextAttachment",
(id)embeddedObjectRunDelegate, kCTRunDelegateAttributeName,
(id)paragraphStyle, kCTParagraphStyleAttributeName, nil];
Expand Down Expand Up @@ -510,6 +504,9 @@ - (id)initWithHTML:(NSData *)data options:(NSDictionary *)options documentAttrib

if ([scanner scanUpToString:@"<" intoString:&tagContents])
{
tagContents = [tagContents stringByNormalizingWhitespace];


NSMutableDictionary *fontAttributes = [NSMutableDictionary dictionary];
NSMutableDictionary *fontStyleAttributes = [NSMutableDictionary dictionary];

Expand Down Expand Up @@ -647,18 +644,6 @@ - (id)initWithHTML:(NSData *)data options:(NSDictionary *)options documentAttrib
}


// TODO: Needs better handling of whitespace compression and adding space between tags if there are newlines
if (![tagContents hasPrefix:@" "])
{


if ([[tmpString string] length] && ![[tmpString string] hasSuffix:@" "] && ![[tmpString string] hasSuffix:@"\n"])
{
tagContents = [@" " stringByAppendingString:tagContents];
}
}


// Add newline after block contents if a new block follows
NSString *nextTag = [scanner peekNextTagSkippingClosingTags:YES];

Expand Down Expand Up @@ -691,6 +676,23 @@ - (id)initWithHTML:(NSData *)data options:(NSDictionary *)options documentAttrib
}
needsNewLineBefore = NO;
}
else // might be a continuation of a paragraph, then we might need space before it
{
// TODO: Needs better handling of whitespace compression and adding space between tags if there are newlines
if (![tagContents hasPrefix:@" "])
{
NSString *stringSoFar = [tmpString string];

if ([stringSoFar length] && ![stringSoFar hasSuffix:@" "] && ![stringSoFar hasSuffix:@"\n"] && ![stringSoFar hasSuffix:UNICODE_LINE_FEED])
{
// add space prefix unless punctuation character
if (![tagContents hasPrefixCharacterFromSet:[NSCharacterSet punctuationCharacterSet]])
{
tagContents = [@" " stringByAppendingString:tagContents];
}
}
}
}

NSAttributedString *tagString = [[NSAttributedString alloc] initWithString:tagContents attributes:attributes];
[tmpString appendAttributedString:tagString];
Expand Down
18 changes: 18 additions & 0 deletions Classes/NSAttributedStringHTMLTest.m
Expand Up @@ -71,6 +71,24 @@ - (void)testImageParagraphs
STAssertEqualObjects(resultOnIOS, resultOnMac, @"Output on List Test differs");
}

- (void)testSpaceNormalization
{
NSString *html = @"<p>Now there is some <b>bold</b>\ntext and spaces\n should be normalized.</p>";

NSData *data = [html dataUsingEncoding:NSUTF8StringEncoding];

NSAttributedString *string = [[NSAttributedString alloc] initWithHTML:data documentAttributes:NULL];
NSData *dump = [[string string] dataUsingEncoding:NSUTF8StringEncoding];
NSString *resultOnIOS = [dump description];

NSString *resultOnMac = @"<4e6f7720 74686572 65206973 20736f6d 6520626f 6c642074 65787420 616e6420 73706163 65732073 686f756c 64206265 206e6f72 6d616c69 7a65642e 0a>";

STAssertEqualObjects(resultOnIOS, resultOnMac, @"Output on List Test differs");
}






@end
2 changes: 2 additions & 0 deletions Classes/NSString+HTML.h
Expand Up @@ -10,5 +10,7 @@

- (NSUInteger)integerValueFromHex;
- (BOOL)isInlineTag;
- (NSString *)stringByNormalizingWhitespace;
- (BOOL)hasPrefixCharacterFromSet:(NSCharacterSet *)characterSet;

@end
55 changes: 55 additions & 0 deletions Classes/NSString+HTML.m
Expand Up @@ -32,4 +32,59 @@ - (BOOL)isInlineTag
return [inlineTags containsObject:[self lowercaseString]];
}


- (NSString *)stringByNormalizingWhitespace
{
NSCharacterSet *whiteSpaceCharacterSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];

NSScanner *scanner = [NSScanner scannerWithString:self];
[scanner setCharactersToBeSkipped:nil];

NSMutableArray *tokens = [NSMutableArray array];

NSString *prefix = @"";
if ([scanner scanCharactersFromSet:whiteSpaceCharacterSet intoString:NULL])
{
prefix = @" ";
}

NSString *suffix = @"";

while (![scanner isAtEnd])
{
NSString *string = nil;

if ([scanner scanUpToCharactersFromSet:whiteSpaceCharacterSet intoString:&string])
{
[tokens addObject:string];
}

if ([scanner scanCharactersFromSet:whiteSpaceCharacterSet intoString:NULL])
{
suffix = @" ";
}
else
{
suffix = @"";
}
}

NSString *retStr = [NSString stringWithFormat:@"%@%@%@", prefix, [tokens componentsJoinedByString:@" "], suffix];

return retStr;
}


- (BOOL)hasPrefixCharacterFromSet:(NSCharacterSet *)characterSet
{
if (![self length])
{
return NO;
}

unichar firstChar = [self characterAtIndex:0];

return [characterSet characterIsMember:firstChar];
}

@end
19 changes: 19 additions & 0 deletions CoreTextExtensions.xcodeproj/project.pbxproj
Expand Up @@ -61,6 +61,16 @@
B2B440C712E030DF00497B74 /* DTAttributedTextContentView.m in Sources */ = {isa = PBXBuildFile; fileRef = A73BC48D12DA078100F064C6 /* DTAttributedTextContentView.m */; };
/* End PBXBuildFile section */

/* Begin PBXContainerItemProxy section */
A78A3F9E12E47D4E007CE622 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 29B97313FDCFA39411CA2CEA /* Project object */;
proxyType = 1;
remoteGlobalIDString = 6A5A32BF12DD108D0019AAF1 /* UnitTest */;
remoteInfo = UnitTest;
};
/* End PBXContainerItemProxy section */

/* Begin PBXFileReference section */
1D30AB110D05D00D00671497 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
1D3623240D0F684500981E51 /* DemoAppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DemoAppDelegate.h; sourceTree = "<group>"; };
Expand Down Expand Up @@ -290,6 +300,7 @@
buildRules = (
);
dependencies = (
A78A3F9F12E47D4E007CE622 /* PBXTargetDependency */,
);
name = CoreTextExtensions;
productName = CoreTextExtensions;
Expand Down Expand Up @@ -435,6 +446,14 @@
};
/* End PBXSourcesBuildPhase section */

/* Begin PBXTargetDependency section */
A78A3F9F12E47D4E007CE622 /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
target = 6A5A32BF12DD108D0019AAF1 /* UnitTest */;
targetProxy = A78A3F9E12E47D4E007CE622 /* PBXContainerItemProxy */;
};
/* End PBXTargetDependency section */

/* Begin XCBuildConfiguration section */
1D6058940D05DD3E006BFB54 /* Debug */ = {
isa = XCBuildConfiguration;
Expand Down
7 changes: 1 addition & 6 deletions Resources/CurrentTest.html
@@ -1,6 +1 @@
<p>Now there is some <b>bold</b>
text and spaces should be normalized.</p>

<h1>List Oddity</h1>
<ul><li>A List Item</li></ul>
<p>This line below a list is too close if not a trick is used. I would want the extra spacing to be achieved by adding paragraph spacing before.</p>
<p>Before</p><img src="Oliver.jpg"><h1>Header</h2><p>after</p><p>Some inline <img src="Oliver.jpg"> text.</p>

0 comments on commit 7c41248

Please sign in to comment.