Browse files

ADded more tags to converter

  • Loading branch information...
1 parent 6a745ef commit 64c581a865fde0db65125ef9a11cbd4f0b7000f6 Ewald Zietsman committed May 21, 2012
View
39 html2latex.py
@@ -5,14 +5,17 @@
import sys
import os
import re, htmlentitydefs
+import urllib
from lxml import etree
from jinja2 import Template, FileSystemLoader, Environment
from jinja2.exceptions import TemplateNotFound
+import PIL
# Some boilerplate to use jinja more elegantly with LaTeX
# http://flask.pocoo.org/snippets/55/
+
LATEX_SUBS = (
(re.compile(r'\\'), r'\\textbackslash'),
(re.compile(r'([{}_#%&$])'), r'\\\1'),
@@ -46,23 +49,27 @@ def delegate(element):
>>> root = etree.HTML('<h1>Title</h1>')
>>> print delegate(root[0][0])
\chapter{Title}'''
-
-# print element.tag, element.text
+ #print '%', element.tag, element.attrib
# delegate the work to classes handling special cases
- if element.tag =='div':
+ if element.tag == 'div':
if 'class' not in element.attrib:
element.attrib['class'] = ''
if element.attrib['class'] == 'keyconcepts':
+ #import pdb; pdb.set_trace()
myElement = div_keyconcepts(element)
elif element.attrib['class'] == 'investigation':
myElement = div_investigation(element)
elif 'investigation-' in element.attrib['class']:
myElement = div_investigation_header(element)
-
else:
myElement = html_element(element)
-
+
+ elif element.tag == 'table':
+ myElement = table(element)
+
+ elif element.tag == 'img':
+ myElement = img(element)
else:
# no special handling required
myElement = html_element(element)
@@ -102,6 +109,28 @@ def render_children(self):
self.content['text'] += delegate(child)
+class table(html_element):
+ def __init__(self, element):
+ html_element.__init__(self, element)
+ # must get number of columns
+ ncols = len(element.find('.//tr').findall('.//td')) + 1
+ self.template = texenv.get_template('table.tex')
+ self.content['ncols'] = ncols + 1
+ self.content['cols'] = '|' + '|'.join(['c' for i in range(int(ncols))])
+
+
+class img(html_element):
+ def __init__(self, element):
+ html_element.__init__(self, element)
+ # get the link to the image and download it.
+ src = element.attrib['src']
+ name = src.rpartition('/')[-1]
+ self.content['imagename'] = name
+ if name not in os.listdir(os.curdir + '/images'):
+ img = urllib.urlopen(src).read()
+ # get mimetype
+ open('images/%s'%name, 'wb').write(img)
+
class div_keyconcepts(html_element):
def __init__(self, element):
View
1 templates/br.tex
@@ -0,0 +1 @@
+
View
0 templates/colgroup.tex
No changes.
View
2 templates/div.tex
@@ -1,3 +1,3 @@
-%%% div
+%%% div (((content.class)))
(((content.text)))
(((content.tail.strip())))
View
1 templates/hr.tex
@@ -1,3 +1,2 @@
\hrule
-
View
5 templates/img.tex
@@ -0,0 +1,5 @@
+
+\begin{center}
+ \includegraphics[width=\textwidth]{images/(((content.imagename)))}
+\end{center}
+
View
2 templates/li.tex
@@ -1,2 +1,2 @@
- \item (((content['text'])))(((content.tail)))
+\item (((content['text'].lstrip())))(((content.tail.strip())))
View
1 templates/not_implemented.tex
@@ -3,3 +3,4 @@
\textbf{(((content['tag'])))(((content['class']))) not yet implemented!}
%
%
+(((content.tail)))
View
2 templates/ol.tex
@@ -1,5 +1,5 @@
\begin{enumerate}
-(((content.text)))
+(((content.text.lstrip())))
\end{enumerate}
(((content.tail)))
View
5 templates/p.tex
@@ -1,5 +1,2 @@
-%p
-
-(((content.text)))
-(((content.tail.strip())))
+(((content.text)))(((content.tail.strip())))
View
1 templates/span.tex
@@ -0,0 +1 @@
+(((content.text.strip()))) (((content.tail.strip())))
View
2 templates/strong.tex
@@ -0,0 +1,2 @@
+\emph{(((content.text)))}
+(((content.tail)))
View
4 templates/table.tex
@@ -0,0 +1,4 @@
+\begin{tabularx}{\textwidth}{(((content.cols)))}
+\hline
+(((content.text)))(((content.tail)))
+\end{tabularx}
View
1 templates/tbody.tex
@@ -0,0 +1 @@
+(((content.text)))(((content.tail)))
View
1 templates/td.tex
@@ -0,0 +1 @@
+(((content.text.lstrip())))(((content.tail.rstrip()))) &
View
2 templates/tr.tex
@@ -0,0 +1,2 @@
+(((content.text.lstrip())))(((content.tail.rstrip()))) \\ \hline
+
View
4 templates/ul.tex
@@ -1,5 +1,5 @@
-
\begin{itemize}
-(((content.text)))
+(((content.text.lstrip())))
\end{itemize}
(((content.tail)))
+

0 comments on commit 64c581a

Please sign in to comment.