Permalink
Browse files

Adds a first test (for html.NeutralHTMLReconstructor).

  • Loading branch information...
1 parent 467bc7e commit a596b271ed41ef0914c1990b320a7c48083c15c6 @BertrandBordage committed Sep 8, 2012
@@ -0,0 +1 @@
+from html import *
@@ -0,0 +1 @@
+from valid_html import ValidHTMLTestCase
@@ -0,0 +1,42 @@
+<!DOCTYPE html>
+<html>
+
+ <head>
+ <meta name="author" content="Monty Python's Flying Circus" />
+ <title>
+ The Lumberjack Song
+ </title>
+ </head>
+
+ <body>
+
+ <h1>The Lumberjack Song</h1>
+
+ <p>
+ I'm a lumberjack and I'm OK,<br />
+ I sleep all night and I work all day,
+ </p>
+
+ <p>
+ <strong>
+ He's a lumberjack and he's OK,<br />
+ He sleeps all night and he works all day,
+ </strong>
+ </p>
+
+ <p>
+ I cut down trees, I eat my lunch,<br />
+ I go to the lavatory;<br />
+ On Wednesdays I go shopping,<br />
+ And have buttered scones for tea!
+ </p>
+
+ <em>Et cetera</em>,
+ <a href="https://www.google.fr/search?q=lumberjack+song">
+ Google this song
+ </a>
+ to hear it or get all its lyrics.
+
+ </body>
+
+</html>
@@ -0,0 +1,27 @@
+import os.path
+from unittest import TestCase
+from terms.html import NeutralHTMLReconstructor
+
+CURRENT_PATH = os.path.abspath(os.path.dirname(__file__))
+
+
+class ValidHTMLTestCase(TestCase):
+ def test(self):
+ '''
+ After being reconstructed, valid_html should be exactly the same.
+ And after being reconstructed, valid_html_with_extra_spaces should
+ be exactly the same as valid_html (since extra whitespaces within tags
+ are stripped).
+ '''
+ filename = 'valid_html.html'
+ html = open(os.path.join(CURRENT_PATH, filename)).read()
+ filename = 'valid_html_with_extra_spaces.html'
+ html_w_extra_spaces = open(os.path.join(CURRENT_PATH, filename)).read()
+
+ r = NeutralHTMLReconstructor()
+ r.feed(html)
+ self.assertEqual(html, r.out)
+
+ r.reset()
+ r.feed(html_w_extra_spaces)
+ self.assertEqual(html, r.out)
@@ -0,0 +1,42 @@
+<!DOCTYPE html>
+<html>
+
+ <head >
+ <meta name = "author" content="Monty Python's Flying Circus" />
+ <title>
+ The Lumberjack Song
+ </title >
+ </head>
+
+ <body >
+
+ <h1>The Lumberjack Song</h1 >
+
+ <p >
+ I'm a lumberjack and I'm OK,<br />
+ I sleep all night and I work all day,
+ </p>
+
+ <p>
+ <strong>
+ He's a lumberjack and he's OK,<br />
+ He sleeps all night and he works all day,
+ </strong>
+ </p>
+
+ <p>
+ I cut down trees, I eat my lunch,<br />
+ I go to the lavatory;<br />
+ On Wednesdays I go shopping,<br />
+ And have buttered scones for tea!
+ </p>
+
+ <em>Et cetera</em>,
+ <a href = "https://www.google.fr/search?q=lumberjack+song" >
+ Google this song
+ </a >
+ to hear it or get all its lyrics.
+
+ </body>
+
+</html>
@@ -0,0 +1 @@
+from NeutralHTMLReconstructor import *

0 comments on commit a596b27

Please sign in to comment.