<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -79,38 +79,33 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
 from __future__ import generators
 
 __author__ = &quot;Leonard Richardson (leonardr@segfault.org)&quot;
-__version__ = &quot;3.1.0&quot;
+__version__ = &quot;3.0.7a&quot;
 __copyright__ = &quot;Copyright (c) 2004-2008 Leonard Richardson&quot;
 __license__ = &quot;New-style BSD&quot;
 
+from sgmllib import SGMLParser, SGMLParseError
 import codecs
 import markupbase
 import types
 import re
-from HTMLParser import HTMLParser, HTMLParseError
+import sgmllib
 try:
-    from htmlentitydefs import name2codepoint
+  from htmlentitydefs import name2codepoint
 except ImportError:
-    name2codepoint = {}
+  name2codepoint = {}
 try:
     set
 except NameError:
     from sets import Set as set
 
 #These hacks make Beautiful Soup able to parse XML with namespaces
+sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
 markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
 
 DEFAULT_OUTPUT_ENCODING = &quot;utf-8&quot;
 
 # First, the classes that represent markup elements.
 
-def sob(unicode, encoding):
-    &quot;&quot;&quot;Returns either the given Unicode string or its encoding.&quot;&quot;&quot;
-    if encoding is None:
-        return unicode
-    else:
-        return unicode.encode(encoding)
-
 class PageElement:
     &quot;&quot;&quot;Contains the navigational information for some part of the page
     (either a tag or a piece of text)&quot;&quot;&quot;
@@ -415,7 +410,7 @@ class NavigableString(unicode, PageElement):
         return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
 
     def __getnewargs__(self):
-        return (unicode(self),)
+        return (NavigableString.__str__(self),)
 
     def __getattr__(self, attr):
         &quot;&quot;&quot;text.string gives you text. This is for backwards
@@ -426,32 +421,34 @@ class NavigableString(unicode, PageElement):
         else:
             raise AttributeError, &quot;'%s' object has no attribute '%s'&quot; % (self.__class__.__name__, attr)
 
-    def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):
-        return self.decode().encode(encoding)
+    def __unicode__(self):
+        return str(self).decode(DEFAULT_OUTPUT_ENCODING)
 
-    def decodeGivenEventualEncoding(self, eventualEncoding):
-        return self
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        if encoding:
+            return self.encode(encoding)
+        else:
+            return self
 
 class CData(NavigableString):
 
-    def decodeGivenEventualEncoding(self, eventualEncoding):
-        return u'&lt;![CDATA[' + self + u']]&gt;'
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return &quot;&lt;![CDATA[%s]]&gt;&quot; % NavigableString.__str__(self, encoding)
 
 class ProcessingInstruction(NavigableString):
-
-    def decodeGivenEventualEncoding(self, eventualEncoding):
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
         output = self
-        if u'%SOUP-ENCODING%' in output:
-            output = self.substituteEncoding(output, eventualEncoding)
-        return u'&lt;?' + output + u'?&gt;'
+        if &quot;%SOUP-ENCODING%&quot; in output:
+            output = self.substituteEncoding(output, encoding)
+        return &quot;&lt;?%s?&gt;&quot; % self.toEncoding(output, encoding)
 
 class Comment(NavigableString):
-    def decodeGivenEventualEncoding(self, eventualEncoding):
-        return u'&lt;!--' + self + u'--&gt;'
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return &quot;&lt;!--%s--&gt;&quot; % NavigableString.__str__(self, encoding)
 
 class Declaration(NavigableString):
-    def decodeGivenEventualEncoding(self, eventualEncoding):
-        return u'&lt;!' + self + u'&gt;'
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return &quot;&lt;!%s&gt;&quot; % NavigableString.__str__(self, encoding)
 
 class Tag(PageElement):
 
@@ -612,7 +609,10 @@ class Tag(PageElement):
 
     def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
         &quot;&quot;&quot;Renders this tag as a string.&quot;&quot;&quot;
-        return self.decode(eventualEncoding=encoding)
+        return self.__str__(encoding)
+
+    def __unicode__(self):
+        return self.__str__(None)
 
     BARE_AMPERSAND_OR_BRACKET = re.compile(&quot;([&lt;&gt;]|&quot;
                                            + &quot;&amp;(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)&quot;
@@ -623,30 +623,24 @@ class Tag(PageElement):
         appropriate XML entity for an XML special character.&quot;&quot;&quot;
         return &quot;&amp;&quot; + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + &quot;;&quot;
 
-    def __unicode__(self):
-        return self.decode()
-
-    def __str__(self):
-        return self.encode()
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                prettyPrint=False, indentLevel=0):
+        &quot;&quot;&quot;Returns a string or Unicode representation of this tag and
+        its contents. To get Unicode, pass None for encoding.
 
-    def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
-               prettyPrint=False, indentLevel=0):
-        return self.decode(prettyPrint, indentLevel, encoding).encode(encoding)
+        NOTE: since Python's HTML parser consumes whitespace, this
+        method is not certain to reproduce the whitespace present in
+        the original string.&quot;&quot;&quot;
 
-    def decode(self, prettyPrint=False, indentLevel=0,
-               eventualEncoding=DEFAULT_OUTPUT_ENCODING):
-        &quot;&quot;&quot;Returns a string or Unicode representation of this tag and
-        its contents. To get Unicode, pass None for encoding.&quot;&quot;&quot;
+        encodedName = self.toEncoding(self.name, encoding)
 
         attrs = []
         if self.attrs:
             for key, val in self.attrs:
                 fmt = '%s=&quot;%s&quot;'
                 if isString(val):
-                    if (self.containsSubstitutions
-                        and eventualEncoding is not None
-                        and '%SOUP-ENCODING%' in val):
-                        val = self.substituteEncoding(val, eventualEncoding)
+                    if self.containsSubstitutions and '%SOUP-ENCODING%' in val:
+                        val = self.substituteEncoding(val, encoding)
 
                     # The attribute value either:
                     #
@@ -676,21 +670,21 @@ class Tag(PageElement):
                     # to escape those to XML entities too.
                     val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
 
-                attrs.append(fmt % (key, val))
+                attrs.append(fmt % (self.toEncoding(key, encoding),
+                                    self.toEncoding(val, encoding)))
         close = ''
         closeTag = ''
         if self.isSelfClosing:
             close = ' /'
         else:
-            closeTag = '&lt;/%s&gt;' % self.name
+            closeTag = '&lt;/%s&gt;' % encodedName
 
         indentTag, indentContents = 0, 0
         if prettyPrint:
             indentTag = indentLevel
             space = (' ' * (indentTag-1))
             indentContents = indentTag + 1
-        contents = self.decodeContents(prettyPrint, indentContents,
-                                       eventualEncoding)
+        contents = self.renderContents(encoding, prettyPrint, indentContents)
         if self.hidden:
             s = contents
         else:
@@ -700,7 +694,7 @@ class Tag(PageElement):
                 attributeString = ' ' + ' '.join(attrs)
             if prettyPrint:
                 s.append(space)
-            s.append('&lt;%s%s%s&gt;' % (self.name, attributeString, close))
+            s.append('&lt;%s%s%s&gt;' % (encodedName, attributeString, close))
             if prettyPrint:
                 s.append(&quot;\n&quot;)
             s.append(contents)
@@ -725,23 +719,19 @@ class Tag(PageElement):
         self.extract()
 
     def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
-        return self.encode(encoding, True)
+        return self.__str__(encoding, True)
 
-    def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
+    def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
                        prettyPrint=False, indentLevel=0):
-        return self.decodeContents(prettyPrint, indentLevel).encode(encoding)
-
-    def decodeContents(self, prettyPrint=False, indentLevel=0,
-                       eventualEncoding=DEFAULT_OUTPUT_ENCODING):
         &quot;&quot;&quot;Renders the contents of this tag as a string in the given
         encoding. If encoding is None, returns a Unicode string..&quot;&quot;&quot;
         s=[]
         for c in self:
             text = None
             if isinstance(c, NavigableString):
-                text = c.decodeGivenEventualEncoding(eventualEncoding)
+                text = c.__str__(encoding)
             elif isinstance(c, Tag):
-                s.append(c.decode(prettyPrint, indentLevel, eventualEncoding))
+                s.append(c.__str__(encoding, prettyPrint, indentLevel))
             if text and prettyPrint:
                 text = text.strip()
             if text:
@@ -782,7 +772,7 @@ class Tag(PageElement):
         return self._findAll(name, attrs, text, limit, generator, **kwargs)
     findChildren = findAll
 
-    # Pre-3.x compatibility methods. Will go away in 4.0.
+    # Pre-3.x compatibility methods
     first = find
     fetch = findAll
 
@@ -792,15 +782,6 @@ class Tag(PageElement):
     def firstText(self, text=None, recursive=True):
         return self.find(text=text, recursive=recursive)
 
-    # 3.x compatibility methods. Will go away in 4.0.
-    def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
-                       prettyPrint=False, indentLevel=0):
-        if encoding is None:
-            return self.decodeContents(prettyPrint, indentLevel, encoding)
-        else:
-            return self.encodeContents(encoding, prettyPrint, indentLevel)
-
-
     #Private methods
 
     def _getAttrMap(self):
@@ -813,22 +794,24 @@ class Tag(PageElement):
         return self.attrMap
 
     #Generator methods
-    def recursiveChildGenerator(self):
-        if not len(self.contents):
-            raise StopIteration
-        stopNode = self._lastRecursiveChild().next
-        current = self.contents[0]
-        while current is not stopNode:
-            yield current
-            current = current.next
-
     def childGenerator(self):
-        if not len(self.contents):
-            raise StopIteration
-        current = self.contents[0]
-        while current:
-            yield current
-            current = current.nextSibling
+        for i in range(0, len(self.contents)):
+            yield self.contents[i]
+        raise StopIteration
+
+    def recursiveChildGenerator(self):
+        stack = [(self, 0)]
+        while stack:
+            tag, start = stack.pop()
+            if isinstance(tag, Tag):
+                for i in range(start, len(tag.contents)):
+                    a = tag.contents[i]
+                    yield a
+                    if isinstance(a, Tag) and tag.contents:
+                        if i &lt; len(tag.contents) - 1:
+                            stack.append((tag, i+1))
+                        stack.append((a, 0))
+                        break
         raise StopIteration
 
 # Next, a couple classes to represent queries and their results.
@@ -931,14 +914,13 @@ class SoupStrainer:
             #other ways of matching match the tag name as a string.
             if isinstance(markup, Tag):
                 markup = markup.name
-            if markup is not None and not isString(markup):
+            if markup and not isString(markup):
                 markup = unicode(markup)
             #Now we know that chunk is either a string, or None.
             if hasattr(matchAgainst, 'match'):
                 # It's a regexp object.
                 result = markup and matchAgainst.search(markup)
-            elif (isList(matchAgainst)
-                  and (markup is not None or not isString(matchAgainst))):
+            elif isList(matchAgainst):
                 result = markup in matchAgainst
             elif hasattr(matchAgainst, 'items'):
                 result = markup.has_key(matchAgainst)
@@ -964,8 +946,8 @@ class ResultSet(list):
 def isList(l):
     &quot;&quot;&quot;Convenience method that works with all 2.x versions of Python
     to determine whether or not something is listlike.&quot;&quot;&quot;
-    return ((hasattr(l, '__iter__') and not isString(l))
-            or (type(l) in (types.ListType, types.TupleType)))
+    return hasattr(l, '__iter__') \
+           or (type(l) in (types.ListType, types.TupleType))
 
 def isString(s):
     &quot;&quot;&quot;Convenience method that works with all 2.x versions of Python
@@ -985,7 +967,7 @@ def buildTagMap(default, *args):
             #It's a map. Merge it.
             for k,v in portion.items():
                 built[k] = v
-        elif isList(portion) and not isString(portion):
+        elif isList(portion):
             #It's a list. Map each item to the default.
             for k in portion:
                 built[k] = default
@@ -996,122 +978,7 @@ def buildTagMap(default, *args):
 
 # Now, the parser classes.
 
-class HTMLParserBuilder(HTMLParser):
-
-    def __init__(self, soup):
-        HTMLParser.__init__(self)
-        self.soup = soup
-
-    # We inherit feed() and reset().
-
-    def handle_starttag(self, name, attrs):
-        if name == 'meta':
-            self.soup.extractCharsetFromMeta(attrs)
-        else:
-            self.soup.unknown_starttag(name, attrs)
-
-    def handle_endtag(self, name):
-        self.soup.unknown_endtag(name)
-
-    def handle_data(self, content):
-        self.soup.handle_data(content)
-
-    def _toStringSubclass(self, text, subclass):
-        &quot;&quot;&quot;Adds a certain piece of text to the tree as a NavigableString
-        subclass.&quot;&quot;&quot;
-        self.soup.endData()
-        self.handle_data(text)
-        self.soup.endData(subclass)
-
-    def handle_pi(self, text):
-        &quot;&quot;&quot;Handle a processing instruction as a ProcessingInstruction
-        object, possibly one with a %SOUP-ENCODING% slot into which an
-        encoding will be plugged later.&quot;&quot;&quot;
-        if text[:3] == &quot;xml&quot;:
-            text = u&quot;xml version='1.0' encoding='%SOUP-ENCODING%'&quot;
-        self._toStringSubclass(text, ProcessingInstruction)
-
-    def handle_comment(self, text):
-        &quot;Handle comments as Comment objects.&quot;
-        self._toStringSubclass(text, Comment)
-
-    def handle_charref(self, ref):
-        &quot;Handle character references as data.&quot;
-        if self.soup.convertEntities:
-            data = unichr(int(ref))
-        else:
-            data = '&amp;#%s;' % ref
-        self.handle_data(data)
-
-    def handle_entityref(self, ref):
-        &quot;&quot;&quot;Handle entity references as data, possibly converting known
-        HTML and/or XML entity references to the corresponding Unicode
-        characters.&quot;&quot;&quot;
-        data = None
-        if self.soup.convertHTMLEntities:
-            try:
-                data = unichr(name2codepoint[ref])
-            except KeyError:
-                pass
-
-        if not data and self.soup.convertXMLEntities:
-                data = self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
-
-        if not data and self.soup.convertHTMLEntities and \
-            not self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
-                # TODO: We've got a problem here. We're told this is
-                # an entity reference, but it's not an XML entity
-                # reference or an HTML entity reference. Nonetheless,
-                # the logical thing to do is to pass it through as an
-                # unrecognized entity reference.
-                #
-                # Except: when the input is &quot;&amp;carol;&quot; this function
-                # will be called with input &quot;carol&quot;. When the input is
-                # &quot;AT&amp;T&quot;, this function will be called with input
-                # &quot;T&quot;. We have no way of knowing whether a semicolon
-                # was present originally, so we don't know whether
-                # this is an unknown entity or just a misplaced
-                # ampersand.
-                #
-                # The more common case is a misplaced ampersand, so I
-                # escape the ampersand and omit the trailing semicolon.
-                data = &quot;&amp;amp;%s&quot; % ref
-        if not data:
-            # This case is different from the one above, because we
-            # haven't already gone through a supposedly comprehensive
-            # mapping of entities to Unicode characters. We might not
-            # have gone through any mapping at all. So the chances are
-            # very high that this is a real entity, and not a
-            # misplaced ampersand.
-            data = &quot;&amp;%s;&quot; % ref
-        self.handle_data(data)
-
-    def handle_decl(self, data):
-        &quot;Handle DOCTYPEs and the like as Declaration objects.&quot;
-        self._toStringSubclass(data, Declaration)
-
-    def parse_declaration(self, i):
-        &quot;&quot;&quot;Treat a bogus SGML declaration as raw data. Treat a CDATA
-        declaration as a CData object.&quot;&quot;&quot;
-        j = None
-        if self.rawdata[i:i+9] == '&lt;![CDATA[':
-             k = self.rawdata.find(']]&gt;', i)
-             if k == -1:
-                 k = len(self.rawdata)
-             data = self.rawdata[i+9:k]
-             j = k+3
-             self._toStringSubclass(data, CData)
-        else:
-            try:
-                j = HTMLParser.parse_declaration(self, i)
-            except HTMLParseError:
-                toHandle = self.rawdata[i:]
-                self.handle_data(toHandle)
-                j = i + len(toHandle)
-        return j
-
-
-class BeautifulStoneSoup(Tag):
+class BeautifulStoneSoup(Tag, SGMLParser):
 
     &quot;&quot;&quot;This class contains the basic parser and search code. It defines
     a parser that knows nothing about tag behavior except for the
@@ -1157,15 +1024,14 @@ class BeautifulStoneSoup(Tag):
 
     def __init__(self, markup=&quot;&quot;, parseOnlyThese=None, fromEncoding=None,
                  markupMassage=True, smartQuotesTo=XML_ENTITIES,
-                 convertEntities=None, selfClosingTags=None, isHTML=False,
-                 builder=HTMLParserBuilder):
+                 convertEntities=None, selfClosingTags=None, isHTML=False):
         &quot;&quot;&quot;The Soup object is initialized as the 'root tag', and the
         provided markup (which can be a string or a file-like object)
         is fed into the underlying parser.
 
-        HTMLParser will process most bad HTML, and the BeautifulSoup
+        sgmllib will process most bad HTML, and the BeautifulSoup
         class has some tricks for dealing with some HTML that kills
-        HTMLParser, but Beautiful Soup can nonetheless choke or lose data
+        sgmllib, but Beautiful Soup can nonetheless choke or lose data
         if your data uses self-closing tags or declarations
         incorrectly.
 
@@ -1175,7 +1041,7 @@ class BeautifulStoneSoup(Tag):
         you'll get better performance.
 
         The default parser massage techniques fix the two most common
-        instances of invalid HTML that choke HTMLParser:
+        instances of invalid HTML that choke sgmllib:
 
          &lt;br/&gt; (No space between name of closing tag and tag close)
          &lt;! --Comment--&gt; (Extraneous whitespace in declaration)
@@ -1213,8 +1079,7 @@ class BeautifulStoneSoup(Tag):
             self.escapeUnrecognizedEntities = False
 
         self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags)
-        self.builder = builder(self)
-        self.reset()
+        SGMLParser.__init__(self)
 
         if hasattr(markup, 'read'):        # It's a file-type object.
             markup = markup.read()
@@ -1224,8 +1089,17 @@ class BeautifulStoneSoup(Tag):
             self._feed(isHTML=isHTML)
         except StopParsing:
             pass
-        self.markup = None                 # The markup can now be GCed.
-        self.builder = None                # So can the builder.
+        self.markup = None                 # The markup can now be GCed
+
+    def convert_charref(self, name):
+        &quot;&quot;&quot;This method fixes a bug in Python's SGMLParser.&quot;&quot;&quot;
+        try:
+            n = int(name)
+        except ValueError:
+            return
+        if not 0 &lt;= n &lt;= 127 : # ASCII ends at 127, not 255
+            return
+        return self.convert_codepoint(n)
 
     def _feed(self, inDocumentEncoding=None, isHTML=False):
         # Convert the document to Unicode.
@@ -1252,14 +1126,27 @@ class BeautifulStoneSoup(Tag):
                 # was relying on the existence of markupMassage, this
                 # might cause problems.
                 del(self.markupMassage)
-        self.builder.reset()
+        self.reset()
 
-        self.builder.feed(markup)
+        SGMLParser.feed(self, markup)
         # Close out any unfinished strings and close all the open tags.
         self.endData()
         while self.currentTag.name != self.ROOT_TAG_NAME:
             self.popTag()
 
+    def __getattr__(self, methodName):
+        &quot;&quot;&quot;This method routes method call requests to either the SGMLParser
+        superclass or the Tag superclass, depending on the method name.&quot;&quot;&quot;
+        #print &quot;__getattr__ called on %s.%s&quot; % (self.__class__, methodName)
+
+        if methodName.find('start_') == 0 or methodName.find('end_') == 0 \
+               or methodName.find('do_') == 0:
+            return SGMLParser.__getattr__(self, methodName)
+        elif methodName.find('__') != 0:
+            return Tag.__getattr__(self, methodName)
+        else:
+            raise AttributeError
+
     def isSelfClosingTag(self, name):
         &quot;&quot;&quot;Returns true iff the given string is the name of a
         self-closing tag according to this parser.&quot;&quot;&quot;
@@ -1269,7 +1156,7 @@ class BeautifulStoneSoup(Tag):
     def reset(self):
         Tag.__init__(self, self, self.ROOT_TAG_NAME)
         self.hidden = 1
-        self.builder.reset()
+        SGMLParser.reset(self)
         self.currentData = []
         self.currentTag = None
         self.tagStack = []
@@ -1434,9 +1321,99 @@ class BeautifulStoneSoup(Tag):
     def handle_data(self, data):
         self.currentData.append(data)
 
-    def extractCharsetFromMeta(self, attrs):
-        self.unknown_starttag('meta', attrs)
+    def _toStringSubclass(self, text, subclass):
+        &quot;&quot;&quot;Adds a certain piece of text to the tree as a NavigableString
+        subclass.&quot;&quot;&quot;
+        self.endData()
+        self.handle_data(text)
+        self.endData(subclass)
+
+    def handle_pi(self, text):
+        &quot;&quot;&quot;Handle a processing instruction as a ProcessingInstruction
+        object, possibly one with a %SOUP-ENCODING% slot into which an
+        encoding will be plugged later.&quot;&quot;&quot;
+        if text[:3] == &quot;xml&quot;:
+            text = u&quot;xml version='1.0' encoding='%SOUP-ENCODING%'&quot;
+        self._toStringSubclass(text, ProcessingInstruction)
 
+    def handle_comment(self, text):
+        &quot;Handle comments as Comment objects.&quot;
+        self._toStringSubclass(text, Comment)
+
+    def handle_charref(self, ref):
+        &quot;Handle character references as data.&quot;
+        if self.convertEntities:
+            data = unichr(int(ref))
+        else:
+            data = '&amp;#%s;' % ref
+        self.handle_data(data)
+
+    def handle_entityref(self, ref):
+        &quot;&quot;&quot;Handle entity references as data, possibly converting known
+        HTML and/or XML entity references to the corresponding Unicode
+        characters.&quot;&quot;&quot;
+        data = None
+        if self.convertHTMLEntities:
+            try:
+                data = unichr(name2codepoint[ref])
+            except KeyError:
+                pass
+
+        if not data and self.convertXMLEntities:
+                data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
+
+        if not data and self.convertHTMLEntities and \
+            not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
+                # TODO: We've got a problem here. We're told this is
+                # an entity reference, but it's not an XML entity
+                # reference or an HTML entity reference. Nonetheless,
+                # the logical thing to do is to pass it through as an
+                # unrecognized entity reference.
+                #
+                # Except: when the input is &quot;&amp;carol;&quot; this function
+                # will be called with input &quot;carol&quot;. When the input is
+                # &quot;AT&amp;T&quot;, this function will be called with input
+                # &quot;T&quot;. We have no way of knowing whether a semicolon
+                # was present originally, so we don't know whether
+                # this is an unknown entity or just a misplaced
+                # ampersand.
+                #
+                # The more common case is a misplaced ampersand, so I
+                # escape the ampersand and omit the trailing semicolon.
+                data = &quot;&amp;amp;%s&quot; % ref
+        if not data:
+            # This case is different from the one above, because we
+            # haven't already gone through a supposedly comprehensive
+            # mapping of entities to Unicode characters. We might not
+            # have gone through any mapping at all. So the chances are
+            # very high that this is a real entity, and not a
+            # misplaced ampersand.
+            data = &quot;&amp;%s;&quot; % ref
+        self.handle_data(data)
+
+    def handle_decl(self, data):
+        &quot;Handle DOCTYPEs and the like as Declaration objects.&quot;
+        self._toStringSubclass(data, Declaration)
+
+    def parse_declaration(self, i):
+        &quot;&quot;&quot;Treat a bogus SGML declaration as raw data. Treat a CDATA
+        declaration as a CData object.&quot;&quot;&quot;
+        j = None
+        if self.rawdata[i:i+9] == '&lt;![CDATA[':
+             k = self.rawdata.find(']]&gt;', i)
+             if k == -1:
+                 k = len(self.rawdata)
+             data = self.rawdata[i+9:k]
+             j = k+3
+             self._toStringSubclass(data, CData)
+        else:
+            try:
+                j = SGMLParser.parse_declaration(self, i)
+            except SGMLParseError:
+                toHandle = self.rawdata[i:]
+                self.handle_data(toHandle)
+                j = i + len(toHandle)
+        return j
 
 class BeautifulSoup(BeautifulStoneSoup):
 
@@ -1544,7 +1521,7 @@ class BeautifulSoup(BeautifulStoneSoup):
     # Used to detect the charset in a META tag; see start_meta
     CHARSET_RE = re.compile(&quot;((^|;)\s*charset=)([^;]*)&quot;, re.M)
 
-    def extractCharsetFromMeta(self, attrs):
+    def start_meta(self, attrs):
         &quot;&quot;&quot;Beautiful Soup can detect a charset included in a META tag,
         try to convert the document to that charset, and re-parse the
         document from the beginning.&quot;&quot;&quot;
@@ -1591,7 +1568,6 @@ class BeautifulSoup(BeautifulStoneSoup):
         if tag and tagNeedsEncodingSubstitution:
             tag.containsSubstitutions = True
 
-
 class StopParsing(Exception):
     pass
 
@@ -1772,18 +1748,15 @@ class UnicodeDammit:
         self.unicode = u
         if not u: self.originalEncoding = None
 
-    def _subMSChar(self, match):
+    def _subMSChar(self, orig):
         &quot;&quot;&quot;Changes a MS smart quote character to an XML or HTML
         entity.&quot;&quot;&quot;
-        orig = match.group(1)
         sub = self.MS_CHARS.get(orig)
         if type(sub) == types.TupleType:
             if self.smartQuotesTo == 'xml':
-                sub = '&amp;#x'.encode() + sub[1].encode() + ';'.encode()
+                sub = '&amp;#x%s;' % sub[1]
             else:
-                sub = '&amp;'.encode() + sub[0].encode() + ';'.encode()
-        else:
-            sub = sub.encode()
+                sub = '&amp;%s;' % sub[0]
         return sub
 
     def _convertFrom(self, proposed):
@@ -1798,9 +1771,9 @@ class UnicodeDammit:
         if self.smartQuotesTo and proposed.lower() in(&quot;windows-1252&quot;,
                                                       &quot;iso-8859-1&quot;,
                                                       &quot;iso-8859-2&quot;):
-            smart_quotes_re = &quot;([\x80-\x9f])&quot;
-            smart_quotes_compiled = re.compile(smart_quotes_re)
-            markup = smart_quotes_compiled.sub(self._subMSChar, markup)
+            markup = re.compile(&quot;([\x80-\x9f])&quot;).sub \
+                     (lambda(x): self._subMSChar(x.group(1)),
+                      markup)
 
         try:
             # print &quot;Trying to convert document to %s&quot; % proposed
@@ -1889,15 +1862,13 @@ class UnicodeDammit:
                 pass
         except:
             xml_encoding_match = None
-        xml_encoding_re = '^&lt;\?.*encoding=[\'&quot;](.*?)[\'&quot;].*\?&gt;'.encode()
-        xml_encoding_match = re.compile(xml_encoding_re).match(xml_data)
+        xml_encoding_match = re.compile(
+            '^&lt;\?.*encoding=[\'&quot;](.*?)[\'&quot;].*\?&gt;').match(xml_data)
         if not xml_encoding_match and isHTML:
-            meta_re = '&lt;\s*meta[^&gt;]+charset=([^&gt;]*?)[;\'&quot;&gt;]'.encode()
-            regexp = re.compile(meta_re, re.I)
+            regexp = re.compile('&lt;\s*meta[^&gt;]+charset=([^&gt;]*?)[;\'&quot;&gt;]', re.I)
             xml_encoding_match = regexp.search(xml_data)
         if xml_encoding_match is not None:
-            xml_encoding = xml_encoding_match.groups()[0].decode(
-                'ascii').lower()
+            xml_encoding = xml_encoding_match.groups()[0].lower()
             if isHTML:
                 self.declaredHTMLEncoding = xml_encoding
             if sniffed_xml_encoding and \</diff>
      <filename>BeautifulSoup.py</filename>
    </modified>
    <modified>
      <diff>@@ -30,6 +30,7 @@ class MangaFrame (wx.MDIChildFrame):
     Image = None
     Scrolled2Bottom = None
     auto_open = True
+    last_ctrl = None
     def __init__ (self, parent, id, backend, *args, **kwargs):
         self.BackEnd = __import__(backend)
         self.Engine = BatchEngine(self.BackEnd)
@@ -133,6 +134,8 @@ class MangaFrame (wx.MDIChildFrame):
             if state_type not in state:
                 return
             values = state[state_type]
+            if values[0] == -1:
+                return
             if state_type == &quot;chapter&quot;:
                 control = self.cb_ctrls[&quot;manga&quot;]
             elif state_type == &quot;page&quot;:
@@ -159,6 +162,8 @@ class MangaFrame (wx.MDIChildFrame):
         panel.SetBackgroundStyle(wx.BG_STYLE_SYSTEM)
         self.ButtonBox = MyButtonBox(self, panel)
         ButtonBoxItems = (
+            [&quot;Refresh&quot;, _(&quot;Refresh&quot;)], 
+            &quot;&lt;sep&gt;&quot;, 
             [&quot;Zoom&quot;, _(&quot;Change Zoom&quot;)], 
             [&quot;Original Size&quot;, _(&quot;Resize Image to the original size&quot;)], 
             [&quot;Fit&quot;, _(&quot;Fit to screen&quot;)], 
@@ -177,6 +182,18 @@ class MangaFrame (wx.MDIChildFrame):
         
         self.Show()
     
+    def Refresh (self, refresh_type):
+        if not refresh_type:
+            self.parent.ClearCache()
+        elif refresh_type in (&quot;manga&quot;, &quot;chapter&quot;, &quot;page&quot;):
+            for path in (self.Engine.last_value_path, self.Engine.last_pickle_path):
+                if not path:
+                    continue
+                if os.path.isfile(path):
+                    os.remove(path)
+            getattr(self, &quot;OnComboBox_&quot; + refresh_type)(None)
+    def OnRefresh (self, event):
+        self.Refresh(self.last_ctrl)
     def Zoom (self, zoom_value):
         self.Image.Resize(zoom_value)
         self.UpdateImage()
@@ -212,7 +229,7 @@ class MangaFrame (wx.MDIChildFrame):
         self.AutoFit = True
         self.scrolledwindow.SetFocus()
     def OnPrevious (self, event):
-        if not self.Image:
+        if not self.last_ctrl == &quot;page&quot;:
             wx.MessageBox(_(&quot;You should load a page before!&quot;))
             return
         self.cb_ctrls[&quot;page&quot;].Previous()
@@ -221,7 +238,7 @@ class MangaFrame (wx.MDIChildFrame):
         self.parent.OnFullScreen(event)
         self.scrolledwindow.SetFocus()
     def OnForward (self, event):
-        if not self.Image:
+        if not self.last_ctrl == &quot;page&quot;:
             wx.MessageBox(_(&quot;You should load a page before!&quot;))
             return
         self.cb_ctrls[&quot;page&quot;].Forward()
@@ -242,11 +259,11 @@ class MangaFrame (wx.MDIChildFrame):
         self.scrolledwindow.SetFocus()
     
     def OnComboBox_manga (self, event):
-        self.OnComboBox(self.cb_ctrls[&quot;manga&quot;])
+        self.OnComboBox(&quot;manga&quot;)
     def OnComboBox_chapter (self, event):
-        self.OnComboBox(self.cb_ctrls[&quot;chapter&quot;])
+        self.OnComboBox(&quot;chapter&quot;)
     def OnComboBox_page (self, event):
-        self.OnComboBox(self.cb_ctrls[&quot;page&quot;])
+        self.OnComboBox(&quot;page&quot;)
         
         def start ():
             manga_ctrl = self.cb_ctrls[&quot;manga&quot;]
@@ -261,7 +278,9 @@ class MangaFrame (wx.MDIChildFrame):
             self.EngineThread = threading.Thread(target=self.Engine.run)
             self.EngineThread.start()
         threading.Thread(target=start).start()
-    def OnComboBox (self, ctrl):
+    def OnComboBox (self, ctrl_name):
+        self.last_ctrl = ctrl_name
+        ctrl = self.cb_ctrls[ctrl_name]
         wx.BeginBusyCursor()
         self.title = self._title + _(&quot; ...Loading&quot;)
         self.SetTitle(self.title)</diff>
      <filename>gmangas/mangaframe.py</filename>
    </modified>
    <modified>
      <diff>@@ -16,6 +16,7 @@
 # along with this program.  If not, see &lt;http://www.gnu.org/licenses/&gt;.
 import threading
 import os
+import shutil
 try:
     import cPickle as pickle
 except:
@@ -65,6 +66,8 @@ class MDIFrame (wx.MDIParentFrame):
             
             [_(&quot;Download Manga&quot;), _(&quot;Show FullScreen&quot;), self.OnDownloadManga, _(&quot;Utils&quot;)],
             [_(&quot;Download Chapter&quot;), _(&quot;Show FullScreen&quot;), self.OnDownloadChapter, _(&quot;Utils&quot;)],
+            [&quot;&lt;sep&gt;&quot;, _(&quot;Utils&quot;)],
+            [_(&quot;Clear Cache&quot;), _(&quot;Clear Cache (not manga's images!)&quot;), self.OnClearCache, _(&quot;Utils&quot;)],
             
             [_(&quot;About&quot;), _(&quot;About window&quot;), self.OnAbout, _(&quot;Help&quot;), lambda id:wx.App.SetMacAboutMenuItemId(id)]]
 
@@ -146,6 +149,12 @@ class MDIFrame (wx.MDIParentFrame):
         DownloadDialog(self, self.MenuBar.SelectedBackend, &quot;manga&quot;)
     def OnDownloadChapter (self, event):
         DownloadDialog(self, self.MenuBar.SelectedBackend, &quot;chapter&quot;)
+    def ClearCache (self):
+        remove_dirs([hdir(&quot;pickled&quot;), hdir(&quot;values&quot;)])
+        path = hdir(&quot;STATE&quot;)
+        if os.path.isfile(path):
+            os.remove(path)
+    OnClearCache = lambda event: ClearCache()
 
     def NewManga (self, id, backend, state=None): 
         self.Frames[id] = mangaWin = MangaFrame(self, id, backend)</diff>
      <filename>gmangas/mdi.py</filename>
    </modified>
    <modified>
      <diff>@@ -41,10 +41,18 @@ def cdir (*args):
 def hdir (*args):
     args = filter(lambda x:x, args)
     return os.path.join(homedir, *args)
+def remove_dir (directory):
+    if os.path.isdir(directory):
+        shutil.rmtree(directory)
+def remove_dirs (dirs):
+    for directory in dirs:
+        remove_dir(directory)
 __builtin__.cdir = cdir
 __builtin__.curdir = os.path.dirname(cdir(&quot;gmangas&quot;))
 __builtin__.hdir = hdir
 __builtin__.homedir = homedir
+__builtin__.remove_dir = remove_dir
+__builtin__.remove_dirs = remove_dirs
 sys.path.append(os.path.join(curdir, &quot;backends&quot;))
 
 __builtin__.__url__ = &quot;http://gmangas.itascan.info/&quot;
@@ -66,10 +74,7 @@ def check_cache_v ():
             if l_cache_v != __version__:
                 print &quot;Changed Gmangas2 version: &quot;\
                 +&quot;Removing pickled/values caches&quot;
-                if os.path.isdir(hdir(&quot;pickled&quot;)):
-                    shutil.rmtree(hdir(&quot;pickled&quot;))
-                if os.path.isdir(hdir(&quot;values&quot;)):
-                    shutil.rmtree(hdir(&quot;values&quot;))
+                remove_dirs([hdir(&quot;pickled&quot;), hdir(&quot;values&quot;)])
                 write()
     else:
         write()
@@ -97,6 +102,7 @@ class Engine:
     clear_url = lambda self,url:url.replace(&quot;/&quot;,&quot;&quot;).replace(&quot;?&quot;,&quot;&quot;)
     images = {}
     forced_chapters = []
+    last_value_path,last_pickle_path,last_pickle_dir = None,None,None
     def __init__ (self, BackEnd):
         self.GetSavePath = lambda ending: hdir(&quot;values&quot;, self.BackEnd.name, ending)
         
@@ -112,8 +118,8 @@ class Engine:
         self.image_mode = &quot;image&quot;
     
     def Pickle (self, pickle_path, CallBack, url=None, ext=None, force=False):
-        pickle_path = hdir(&quot;pickled&quot;, self.BackEnd.name, *pickle_path)
-        pickle_dir = os.path.dirname(pickle_path)
+        self.last_pickle_path = pickle_path = hdir(&quot;pickled&quot;, self.BackEnd.name, *pickle_path)
+        self.last_pickle_dir = pickle_dir = os.path.dirname(pickle_path)
         if not os.path.isdir(pickle_dir):
             os.makedirs(pickle_dir)
         
@@ -140,11 +146,12 @@ class Engine:
                 data = self.__Download(*url)
                 funct = getattr(self.BackEnd, &quot;parse_&quot;+mode)
                 return funct(data)
-            if (mode == &quot;chapter&quot;) and (url not in self.forced_chapters):
-                force = True
-                self.forced_chapters.append(url)
-            else:
-                force = False
+            #if (mode == &quot;chapter&quot;) and (url not in self.forced_chapters):
+            #    force = True
+            #    self.forced_chapters.append(url)
+            #else:
+            #    force = False
+            force = False
             if len(url) &gt; 1:
                 result = self.Pickle(pickle_path, Pickle_CallBack, url[0], url[1], force=force)
             else:
@@ -236,7 +243,7 @@ class Engine:
         return [result, old_result]
     
     def __Download (self, url, ext=None, path=None):
-        path = self.GetSavePath(self.clear_url(url))
+        self.last_value_path = path = self.GetSavePath(self.clear_url(url))
         if not url:
             path = os.path.join(path, &quot;main&quot;)
         dirname = os.path.dirname(path)
@@ -267,7 +274,8 @@ class BatchEngine (Engine):
         if hasattr(self.parent, &quot;gauge1&quot;):
             wx.CallAfter(self.parent.gauge1.SetRange, (len(self.Values[&quot;page&quot;][0])))
         for img_id, img_value in enumerate(self.Values[&quot;page&quot;][0]):
-            if self.stopped or (not app.Running): return
+            if self.stopped or (not app.Running):
+                return
             self.State[&quot;image&quot;] = (img_id, img_value)
             self.LoadImage(img_id, chapter[1], download_only=True, save=True)
             if hasattr(self.parent, &quot;gauge1&quot;):</diff>
      <filename>gmangas2.py</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>c4f6fe02eaf293cc90ba37665490017306a199cb</id>
    </parent>
  </parents>
  <author>
    <name>luxkun</name>
    <email>luciano.ferraro@gmail.com</email>
  </author>
  <url>http://github.com/luxkun/gmangas2/commit/d8a96898f76e0924f2d95a9c14b9fc7bfaeae114</url>
  <id>d8a96898f76e0924f2d95a9c14b9fc7bfaeae114</id>
  <committed-date>2009-01-05T17:05:51-08:00</committed-date>
  <authored-date>2009-01-05T17:05:51-08:00</authored-date>
  <message>Now gmangas won't go mad if you try to go forward with an old image, while loading a new manga;added clear cache menuitem;added refresh button;removed auto-always-loading of manga's chapter list;various fixes</message>
  <tree>1cbc1d907b5bf6e5347c02bf2a855d5a8fc02ab4</tree>
  <committer>
    <name>luxkun</name>
    <email>luciano.ferraro@gmail.com</email>
  </committer>
</commit>
