Skip to content

Commit

Permalink
UTF-8 CSV reader used in place of standard Python 2 one which can't h…
Browse files Browse the repository at this point in the history
…andle unicode.
  • Loading branch information
JamesBradbury committed Apr 12, 2018
1 parent 6e8e7a0 commit 3eff4aa
Showing 1 changed file with 35 additions and 3 deletions.
38 changes: 35 additions & 3 deletions geokey_dataimports/helpers/model_helpers.py
@@ -1,14 +1,46 @@
import csv
import codecs

from django.utils.html import strip_tags


def import_from_csv(features, fields, file):
reader = csv.reader(file)
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)

def __iter__(self):
return self

def next(self):
return self.reader.next().encode("utf-8")


class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)

def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]

def __iter__(self):
return self


def import_from_csv(features, fields, file_obj):
reader = UnicodeReader(file_obj)
for fieldname in next(reader, None):
fields.append({
'name': strip_tags(fieldname),
'good_types': set(['TextField', 'LookupField']),
'good_types': {'TextField', 'LookupField'},
'bad_types': set([])
})
line = 0
Expand Down

0 comments on commit 3eff4aa

Please sign in to comment.