diff --git a/README.rst b/README.rst index a1e8889..14e7bde 100755 --- a/README.rst +++ b/README.rst @@ -175,7 +175,7 @@ html 문서를 text로 변환 설치 방법 ------------------- -`설치 `_ 문서를 참조하십시오. +설치 문서를 참조하십시오: http://pyufp.readthedocs.org/installation.html 소스 코드 ------------------- diff --git a/changelog.rst b/changelog.rst index 3f0d821..b78bb35 100644 --- a/changelog.rst +++ b/changelog.rst @@ -1,6 +1,13 @@ 변경사항 ============== +v1.5.1 +------- + ++ tempfile.mkstemp로 생성한 파일 디스크립터가 닫기지 않았던 버그 수정. [`tb69wn6127`_] ++ 'UnicodeEncodeError: 'ascii' codec can't encode character' 버그 수정. [`tb69wn6127`_] ++ ufp.html.toText 함수에서 converter 옵션을 'w3m'으로 할 경우, 반환되는 텍스트가 bytes이던 점 수정. [`tb69wn6127`_] + v1.5.0 ------- diff --git a/setup.py b/setup.py index a80ce9c..8cbebee 100755 --- a/setup.py +++ b/setup.py @@ -81,7 +81,7 @@ def read(fname): "Topic :: System :: Shells", "Topic :: Utilities", "Topic :: Text Processing :: General", - "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", "Operating System :: Unix", "Operating System :: POSIX", "Operating System :: MacOS", diff --git a/ufp/__init__.py b/ufp/__init__.py index 553c1ed..85e9e02 100755 --- a/ufp/__init__.py +++ b/ufp/__init__.py @@ -4,10 +4,12 @@ from __future__ import unicode_literals, absolute_import __title__ = 'ufp' -__version__ = '1.5.0' +__version__ = '1.5.1' __author__ = '별님' +__author_email__ = 'w7dn1ng75r@gmail.com' __license__ = 'GPL v3' __copyright__ = 'Copyright 2015 별님' +__url__ = 'https://github.com/Thestars3/pyufp/' from .ufp import * diff --git a/ufp/gui/notepad.py b/ufp/gui/notepad.py index 888849e..c3c89b3 100755 --- a/ufp/gui/notepad.py +++ b/ufp/gui/notepad.py @@ -22,13 +22,12 @@ def write(self, content): :param content: 쓸 내용. list의 경우 각 항목을 줄 단위로 분할하여 기록합니다. list내에 존재하는 unicode는 그대로 기록하고, 그 외는 pprint.pformat함수를 호출하여 텍스트로 바꾸어 기록합니다. 그 외 요소는 모두 pprint.pformat함수를 호출하여 기록합니다. """ if isinstance(content, unicode): - write = content - pass + write = content.encode('UTF-8') elif isinstance(content, list): write = list() for i in content: if isinstance(i, unicode): - write.append(i) + write.append(i.encode('UTF-8')) else: write.append(pprint.pformat(i)) write = '\n'.join(write) @@ -36,7 +35,6 @@ def write(self, content): write = pprint.pformat(content) self._process.stdin.write(write) self._process.stdin.close() - pass def close(self): """ diff --git a/ufp/html.py b/ufp/html.py index 79378e7..48528c9 100755 --- a/ufp/html.py +++ b/ufp/html.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals, absolute_import, division, print_function import subprocess import tempfile -import os import pattern.web import tidylib @@ -65,17 +64,15 @@ def toText(html, converter='pattern.web', linebreaks=10, strip=False) : :rtype: unicode """ if converter == 'w3m': - tempPath = tempfile.mkstemp(prefix='.tmp_', suffix='.html')[1] - with open(tempPath, 'w+b') as f: - f.write(html) - pass - text = subprocess.check_output(['w3m', '-cols', '98304', '-dump' ,tempPath]) - os.remove(tempPath) + with tempfile.NamedTemporaryFile('wb', prefix='.tmp_', suffix='.html') as tempFile: + tempFile.write(html.encode('UTF-8')) + tempFile.flush() + text = subprocess.check_output(['w3m', '-cols', '98304', '-dump', tempFile.name]) if linebreaks is not None: text = pattern.web.collapse_linebreaks(text, linebreaks) if strip: text = text.strip() - return text + return text.deocde('UTF-8') if converter == 'pattern.web': html = pattern.web.strip_javascript(html) @@ -93,4 +90,4 @@ def toText(html, converter='pattern.web', linebreaks=10, strip=False) : text = text.strip() return text - raise ValueError("'{0}'는 지원하지 않는 변환기입니다.".format(converter)) + raise ValueError("'{converter}'는 지원하지 않는 변환기입니다.".format(converter=converter)) diff --git a/ufp/image.py b/ufp/image.py index a1c2f77..6a75060 100755 --- a/ufp/image.py +++ b/ufp/image.py @@ -90,7 +90,7 @@ def changeColorDepth(image, colorCount): change = lambda value: math.trunc(value/raito)*raito return PIL.Image.eval(image, change) - raise ValueError('{0} 모드의 이미지는 사용 할 수 없습니다.'.format(image.mode)) + raise ValueError('{mode} 모드의 이미지는 사용 할 수 없습니다.'.format(mode=image.mode)) def mostPopularEdgeColor(image): """ @@ -213,7 +213,7 @@ def trim(image, backgroundColor=None, fuzz=0): bbox = diff.getbbox() # 이미지의 진짜 둘레를 찾는다. else: - raise ValueError('{0} 모드의 이미지는 처리가 불가능 합니다.'.format(image.mode)) + raise ValueError('{mode} 모드의 이미지는 처리가 불가능 합니다.'.format(mode=image.mode)) if bbox: image = image.crop(bbox) diff --git a/ufp/path.py b/ufp/path.py index 9d3e8b4..345e910 100644 --- a/ufp/path.py +++ b/ufp/path.py @@ -206,7 +206,7 @@ def toUrl(path): :return: file:///형식으로된 주소 :rtype: unicode """ - buffer = urllib.pathname2url(str(path)) + buffer = urllib.pathname2url(path.encode('UTF-8')) return urlparse.urljoin('file:', buffer) def replaceSpiecalChar(string, **options) : @@ -321,7 +321,7 @@ def unique(targetPath, spliteExt = True) : #중복되는 대상이 존재하는지 확인 existDuplicateFile = False; buffer = re.escape(targetBasename); - fullmatchRe = re.compile(r"^{0}$".format(buffer), re.IGNORECASE | re.UNICODE) + fullmatchRe = re.compile(r"^{targetBasename}$".format(targetBasename=buffer), re.IGNORECASE | re.UNICODE) for fileName in fileList: if fullmatchRe.search(fileName): existDuplicateFile = True; @@ -346,9 +346,9 @@ def unique(targetPath, spliteExt = True) : #중복 파일들의 숫자를 가져옴. escapedTargetFileName = re.escape(targetFileName); if spliteExt : - extractDupCountRe = re.compile(r"^%(escapedTargetFileName)s \(d(?P[0-9]+)\)\.%(targetFileExt)s$" % locals(), re.DOTALL | re.IGNORECASE | re.UNICODE); + extractDupCountRe = re.compile(r"^{escapedTargetFileName} \(d(?P[0-9]+)\)\.{targetFileExt}$".format(escapedTargetFileName=escapedTargetFileName, targetFileExt=targetFileExt), re.DOTALL | re.IGNORECASE | re.UNICODE); else : - extractDupCountRe = re.compile(r"^%(escapedTargetFileName)s \(d(?P[0-9]+)\)$" % locals(), re.DOTALL | re.IGNORECASE | re.UNICODE); + extractDupCountRe = re.compile(r"^{escapedTargetFileName} \(d(?P[0-9]+)\)$".format(escapedTargetFileName=escapedTargetFileName), re.DOTALL | re.IGNORECASE | re.UNICODE); counts = []; for fileName in fileList : m = extractDupCountRe.search(fileName); @@ -367,9 +367,9 @@ def unique(targetPath, spliteExt = True) : #중복 회피 이름 생성 if spliteExt : - uniqueName = "%(targetFileName)s (d%(notDuplicatedNumber)d).%(targetFileExt)s" % locals(); + uniqueName = "{targetFileName} (d{notDuplicatedNumber}).{targetFileExt}".format(targetFileName=targetFileName, notDuplicatedNumber=notDuplicatedNumber, targetFileExt=targetFileExt) else : - uniqueName = "%(targetFileName)s (d%(notDuplicatedNumber)d)" % locals(); + uniqueName = "{targetFileName} (d{notDuplicatedNumber})".format(targetFileName=targetFileName, notDuplicatedNumber=notDuplicatedNumber) return os.path.join(targetDirname, uniqueName); diff --git a/ufp/pdf.py b/ufp/pdf.py index c40ff4e..49226f8 100755 --- a/ufp/pdf.py +++ b/ufp/pdf.py @@ -69,8 +69,8 @@ def toBmps(pdf, format='bmp16m', dpi=200): """ #명령 설정 cmd = _ghostscriptCommand[:] - cmd.insert(-2, '-sDEVICE={0}'.format(format)) #출력 형식 - cmd.insert(-2, '-r{0}x{0}'.format(dpi)) #DPI + cmd.insert(-2, '-sDEVICE={format}'.format(format=format)) #출력 형식 + cmd.insert(-2, '-r{dpi}x{dpi}'.format(dpi=dpi)) #DPI try: #실행 @@ -179,8 +179,8 @@ def toBmp(pdf, format='bmp32b', dpi=200): :rtype: bytes """ cmd = _ghostscriptCommand[:] - cmd.insert(-2, '-sDEVICE={0}'.format(format)) #출력 형식 - cmd.insert(-2, '-r{0}x{0}'.format(dpi)) #DPI + cmd.insert(-2, '-sDEVICE={format}'.format(format=format)) #출력 형식 + cmd.insert(-2, '-r{dpi}x{dpi}'.format(dpi=dpi)) #DPI devnull = open(os.devnull, 'w') gs = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=devnull) bmp = gs.communicate(pdf)[0] diff --git a/ufp/shell.py b/ufp/shell.py index 659052d..4d50b3e 100755 --- a/ufp/shell.py +++ b/ufp/shell.py @@ -41,5 +41,5 @@ def quote(string): :rtype: unicode """ buffer = string.replace("'", r"'\''") - return "'{0}'".format(buffer) + return "'{string}'".format(string=buffer) \ No newline at end of file diff --git a/ufp/web.py b/ufp/web.py index ade0114..2042d3a 100755 --- a/ufp/web.py +++ b/ufp/web.py @@ -7,7 +7,6 @@ import re import cookielib import chardet -import os from . import path as _p_path from . import string as _p_string @@ -55,7 +54,6 @@ def trimFilename(filename, **options): fromEncoding = None else: fromEncoding = options[u'from_encoding'] - pass #url 디코딩 filename = urllib.unquote(filename) @@ -124,7 +122,6 @@ def dequoteJsStr(jsStr) : ]; for before, after in REGEXS : jsStr = jsStr.replace(before, after) - pass return jsStr; def loadNetscapeCookie(session, cookiePath): @@ -142,10 +139,7 @@ def loadNetscapeCookie(session, cookiePath): :type cookiePath: unicode """ #임시 파일 생성 - tmpCookiePath = tempfile.mkstemp(prefix='.tmp_', suffix='.cookie')[1] - - #임시 파일 객체 생성 - with open(tmpCookiePath, mode='w+b') as tmpCookie: + with tempfile.NamedTemporaryFile('wb', prefix='.tmp_', suffix='.cookie') as tmpCookie: #넷스케이프 헤더 작성 tmpCookie.write('# Netscape HTTP Cookie File\n') tmpCookie.write('# http://www.netscape.com/newsref/std/cookie_spec.html\n') @@ -154,16 +148,12 @@ def loadNetscapeCookie(session, cookiePath): #기존 쿠키 파일의 내용을 삽입; 윈도우식 줄바꿈 -> 리눅스식으로 치환. with open(cookiePath, 'r') as f: - buffer = f.read().replace('\r\n', '\n') - tmpCookie.write(buffer) - pass - - #쿠키 파일 불러오기 - cookieJar = cookielib.MozillaCookieJar(tmpCookiePath) - cookieJar.load(ignore_discard=True, ignore_expires=True) - session.cookies = cookieJar - - #임시 파일 삭제 - os.remove(tmpCookiePath) + buffer = f.read().replace('\r', '\n') + tmpCookie.write(buffer.encode('UTF-8')) + tmpCookie.flush() + + #쿠키 파일 불러오기 + cookieJar = cookielib.MozillaCookieJar(tmpCookie.name) + cookieJar.load(ignore_discard=True, ignore_expires=True) + session.cookies = cookieJar pass - \ No newline at end of file