Skip to content

Commit

Permalink
Error with useful comment when user file isn't UTF-8
Browse files Browse the repository at this point in the history
re: issue #799
  • Loading branch information
stannam committed Feb 22, 2022
1 parent a536dab commit 5f5fc1b
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 8 deletions.
19 changes: 12 additions & 7 deletions corpustools/corpus/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from .helper import parse_transcription, AnnotationType, SyllableBaseAnnotation

from corpustools.exceptions import DelimiterError, PCTError
from corpustools.exceptions import DelimiterError, PCTEncodingError, PCTError
import corpustools.gui.modernize as modernize

import time
Expand Down Expand Up @@ -44,12 +44,17 @@ def inspect_csv(path, num_lines = 10, coldelim = None, transdelim = None):
else:
trans_delimiters = ['.',' ', ';', ',']

with open(path, 'r', encoding='utf-8-sig') as f:
lines = []
head = f.readline().strip()
for line in f.readlines():
if line != '\n':
lines.append(line.strip())
try:
with open(path, 'r', encoding='utf-8-sig') as f:
lines = []
head = f.readline().strip()
for line in f.readlines():
if line != '\n':
lines.append(line.strip())
except UnicodeDecodeError:
raise(PCTEncodingError("PCT cannot decode your text file. Make sure it is in UTF-8.\n\n"
"To convert your file to UTF-8, please open it in Notepad or TextEdit "
"and then 'Save as' with the encoding set to 'UTF-8.'"))

best = '' ## best guess for the column delimiter (candidates: ',', 'tab', ':', and '|')
num = 1
Expand Down
9 changes: 9 additions & 0 deletions corpustools/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ class PCTContextError(PCTError):
"""
pass

## External file loading exceptions

class PCTEncodingError(PCTError):
"""
Exception class for when the user tries to load an external file that cannot be
decoded. For example, UTF-16
"""
pass

## Corpus loading exceptions

class PCTOSError(PCTError):
Expand Down
7 changes: 6 additions & 1 deletion corpustools/gui/iogui.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import webbrowser

from .imports import *
from corpustools.exceptions import PCTError, PCTPythonError, MissingFeatureError
from corpustools.exceptions import PCTError, PCTPythonError, MissingFeatureError, PCTEncodingError
from corpustools.decorators import check_for_errors
from corpustools.corpus.io.binary import load_binary, save_binary, PCTUnpickler
from corpustools.corpus.io.csv import (inspect_csv, load_corpus_csv,
Expand Down Expand Up @@ -755,6 +755,11 @@ def inspect(self):
if self.textType == 'csv':
try:
atts, coldelim = inspect_csv(self.pathWidget.value())
except PCTEncodingError as error:
# when user tries to load a file with an unknown encoding
reply = QMessageBox.critical(self, "Error encountered", str(error))
self.updateColumnFrame([])
return
except PCTError:
self.updateColumnFrame([])
return
Expand Down

0 comments on commit 5f5fc1b

Please sign in to comment.