Skip to content

Commit

Permalink
Allow - and _ in CSS ID selectors.
Browse files Browse the repository at this point in the history
Closes jhy#10.
  • Loading branch information
jhy committed Jul 2, 2011
1 parent 7199cf1 commit ec69d09
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 5 deletions.
7 changes: 4 additions & 3 deletions src/main/java/org/jsoup/parser/TokenQueue.java
Expand Up @@ -213,10 +213,11 @@ public String consumeWord() {
}

/**
Consume a HTML class name off the queue (letter, digit, -, _)
@return classname
Consume a CSS identifier (ID or class) off the queue (letter, digit, -, _)
http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
@return identifier
*/
public String consumeClassName() {
public String consumeCssIdentifier() {
StringBuilder accum = new StringBuilder();
Character c = queue.peek();
while (!queue.isEmpty() && (Character.isLetterOrDigit(c) || c.equals('-') || c.equals('_'))) {
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jsoup/select/Selector.java
Expand Up @@ -153,7 +153,7 @@ private void intersectElements(Collection<Element> intersect) {
}

private Elements byId() {
String id = tq.consumeWord();
String id = tq.consumeCssIdentifier();
Validate.notEmpty(id);

Element found = root.getElementById(id);
Expand All @@ -164,7 +164,7 @@ private Elements byId() {
}

private Elements byClass() {
String className = tq.consumeClassName();
String className = tq.consumeCssIdentifier();
Validate.notEmpty(className);

return root.getElementsByClass(className);
Expand Down
18 changes: 18 additions & 0 deletions src/test/java/org/jsoup/select/SelectorTest.java
Expand Up @@ -2,6 +2,7 @@

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.junit.Test;
import static org.junit.Assert.*;

Expand Down Expand Up @@ -272,4 +273,21 @@ public class SelectorTest {
assertEquals(1, els.size());
assertEquals("Three", els.first().text());
}

// for http://github.com/jhy/jsoup/issues#issue/10
@Test public void testCharactersInIdAndClass() {
// using CSS spec for identifiers (id and class): a-z0-9, -, _. NOT . (which is OK in html spec, but not css)
String h = "<div><p id='a1-foo_bar'>One</p><p class='b2-qux_bif'>Two</p></div>";
Document doc = Jsoup.parse(h);

Element el1 = doc.getElementById("a1-foo_bar");
assertEquals("One", el1.text());
Element el2 = doc.getElementsByClass("b2-qux_bif").first();
assertEquals("Two", el2.text());

Element el3 = doc.select("#a1-foo_bar").first();
assertEquals("One", el3.text());
Element el4 = doc.select(".b2-qux_bif").first();
assertEquals("Two", el4.text());
}
}

0 comments on commit ec69d09

Please sign in to comment.