/
NSScanner+HTML.m
306 lines (250 loc) · 8 KB
/
NSScanner+HTML.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
//
// NSScanner+HTML.m
// DTCoreText
//
// Created by Oliver Drobnik on 1/12/11.
// Copyright 2011 Drobnik.com. All rights reserved.
//
#import "DTCoreText.h"
#import "NSScanner+HTML.h"
#import "NSCharacterSet+HTML.h"
#import "DTColorFunctions.h"
@implementation NSScanner (HTML)
#pragma mark CSS
// scan a single element from a style list
- (BOOL)scanCSSAttribute:(NSString * __autoreleasing*)name value:(id __autoreleasing*)value
{
NSString *attrName = nil;
NSInteger initialScanLocation = [self scanLocation];
NSCharacterSet *whiteCharacterSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];
NSMutableCharacterSet *nonWhiteCharacterSet = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy];
[nonWhiteCharacterSet formUnionWithCharacterSet:[NSCharacterSet characterSetWithCharactersInString:@";"]];
[nonWhiteCharacterSet invert];
NSMutableCharacterSet *nonWhiteCommaCharacterSet = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy];
[nonWhiteCommaCharacterSet formUnionWithCharacterSet:[NSCharacterSet characterSetWithCharactersInString:@";,"]];
[nonWhiteCommaCharacterSet invert];
// alphanumeric plus -
NSCharacterSet *cssStyleAttributeNameCharacterSet = [NSCharacterSet cssStyleAttributeNameCharacterSet];
if (![self scanCharactersFromSet:cssStyleAttributeNameCharacterSet intoString:&attrName])
{
return NO;
}
// skip whitespace
[self scanCharactersFromSet:whiteCharacterSet intoString:NULL];
// expect :
if (![self scanString:@":" intoString:NULL])
{
[self setScanLocation:initialScanLocation];
return NO;
}
// skip whitespace
[self scanCharactersFromSet:whiteCharacterSet intoString:NULL];
NSMutableArray *results = [NSMutableArray array];
BOOL nextIterationAddsNewEntry = YES;
while (![self isAtEnd] && ![self scanString:@";" intoString:NULL])
{
// skip whitespace
[self scanCharactersFromSet:whiteCharacterSet intoString:NULL];
NSString *quote = nil;
if ([self scanCharactersFromSet:[NSCharacterSet quoteCharacterSet] intoString:"e])
{
NSString *quotedValue = nil;
// attribute is quoted
if (![self scanUpToString:quote intoString:"edValue])
{
[self setScanLocation:initialScanLocation];
return NO;
}
else
{
if (nextIterationAddsNewEntry)
{
[results addObject:quotedValue];
nextIterationAddsNewEntry = NO;
}
else
{
quotedValue = [NSString stringWithFormat:@"%@ %@%@%@", [results lastObject], quote, quotedValue, quote];
[results removeLastObject];
[results addObject:quotedValue];
}
}
// skip ending quote
[self scanString:quote intoString:NULL];
//TODO: decode unicode sequences like "\2022"
}
else
{
// attribute is not quoted, we append elements until we find a ; or the string is at the end
NSString *valueString = nil;
if ([self scanString:@"rgb(" intoString:&valueString])
{
if ([valueString isEqualToString:@"rgb("])
{
[self scanUpToString:@";" intoString:&valueString];
NSString * formattedRGBString = [NSString stringWithFormat:@"rgb(%@", valueString];
if (nextIterationAddsNewEntry)
{
[results addObject:formattedRGBString];
nextIterationAddsNewEntry = NO;
}
else
{
valueString = [NSString stringWithFormat:@"%@ %@", [results lastObject], formattedRGBString];
[results removeLastObject];
[results addObject:valueString];
}
}
}
else if ([self scanString:@"," intoString:&valueString])
{
BOOL isStringOnlyCSSProperty = NO;
if (![valueString isEqualToString:@","])
{
[results addObject:valueString];
}
else if ([attrName isEqualToString:@"font"] ||
([attrName rangeOfString:@"color"].location != NSNotFound) ||
([attrName rangeOfString:@"shadow"].location != NSNotFound) ||
([attrName rangeOfString:@"background"].location != NSNotFound))
{
valueString = [NSString stringWithFormat:@"%@%@", [results lastObject], valueString];
[results removeLastObject];
[results addObject:valueString];
isStringOnlyCSSProperty = YES;
}
if ([valueString isEqualToString:@","] && !isStringOnlyCSSProperty)
{
nextIterationAddsNewEntry = YES;
}
}
else if ([self scanCharactersFromSet:nonWhiteCommaCharacterSet intoString:&valueString])
{
if ([valueString length] && ![valueString isEqualToString:@","])
{
if (nextIterationAddsNewEntry) {
[results addObject:valueString];
nextIterationAddsNewEntry = NO;
} else {
valueString = [NSString stringWithFormat:@"%@ %@", [results lastObject], valueString];
[results removeLastObject];
[results addObject:valueString];
}
}
}
}
// skip whitespace
[self scanCharactersFromSet:whiteCharacterSet intoString:NULL];
}
// Success
if (name)
{
*name = attrName;
}
if (value)
{
if (results.count == 0) {
*value = @"";
} else if (results.count == 1) {
*value = [results objectAtIndex:0];
} else {
*value = results;
}
}
return YES;
}
/*
Source: http://www.w3.org/TR/CSS1/#url
The format of a URL value is 'url(' followed by optional white space followed by an optional single quote (') or double quote (") character followed by the URL itself (as defined in [11]) followed by an optional single quote (') or double quote (") character followed by optional whitespace followed by ')'. Quote characters that are not part of the URL itself must be balanced.
Parentheses, commas, whitespace characters, single quotes (') and double quotes (") appearing in a URL must be escaped with a backslash: '\(', '\)', '\,'.
Partial URLs are interpreted relative to the source of the style sheet, not relative to the document:
*/
// NOTE: Simplified, we assume that there are no quotes in the URL
- (BOOL)scanCSSURL:(NSString * __autoreleasing*)urlString
{
if (![self scanString:@"url(" intoString:NULL])
{
return NO;
}
NSCharacterSet *quoteCharacterSet = [NSCharacterSet quoteCharacterSet];
NSString *quote;
NSString *attrValue;
if ([self scanCharactersFromSet:quoteCharacterSet intoString:"e])
{
if ([quote length]==1)
{
[self scanUpToString:quote intoString:&attrValue];
[self scanString:quote intoString:NULL];
}
else
{
// most likely e.g. href=""
attrValue = @"";
}
// decode HTML entities
attrValue = [attrValue stringByReplacingHTMLEntities];
}
else
{
// non-quoted attribute, ends at )
if ([self scanUpToString:@")" intoString:&attrValue])
{
// decode HTML entities
attrValue = [attrValue stringByReplacingHTMLEntities];
}
}
if (urlString)
{
*urlString = attrValue;
}
return YES;
}
- (BOOL)scanHTMLColor:(DTColor * __autoreleasing*)color
{
return [self scanHTMLColor:color HTMLName:NULL];
}
- (BOOL)scanHTMLColor:(DTColor * __autoreleasing*)color HTMLName:(NSString * __autoreleasing*)name
{
NSUInteger indexBefore = [self scanLocation];
NSString *colorName = nil;
NSMutableCharacterSet *tokenEndSet = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy];
[tokenEndSet addCharactersInString:@","];
if ([self scanString:@"#" intoString:NULL])
{
self.scanLocation = indexBefore;
[self scanUpToCharactersFromSet:tokenEndSet intoString:&colorName];
}
else if ([self scanString:@"rgb" intoString:NULL])
{
if ([self scanUpToString:@")" intoString:NULL])
{
self.scanLocation++;
colorName = [[self string] substringWithRange:NSMakeRange(indexBefore, self.scanLocation - indexBefore)];
colorName = [colorName stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}
}
else
{
// could be a plain html color name
[self scanCharactersFromSet:[NSCharacterSet alphanumericCharacterSet] intoString:&colorName];
}
DTColor *foundColor = nil;
if (colorName)
{
foundColor = DTColorCreateWithHTMLName(colorName);
}
if (!foundColor)
{
self.scanLocation = indexBefore;
return NO;
}
if (color)
{
*color = foundColor;
}
if (name) {
*name = colorName;
}
return YES;
}
@end