Skip to content

Commit

Permalink
Merge pull request #73 from CheViana/develop
Browse files Browse the repository at this point in the history
Fix file upload for python3
  • Loading branch information
looselycoupled committed Jun 3, 2016
2 parents c8dc2c4 + 447ce19 commit e9df108
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 14 deletions.
24 changes: 18 additions & 6 deletions dataset/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
from markupfield.fields import MarkupField
from model_utils.models import TimeStampedModel
from django.core.urlresolvers import reverse
import csv
import codecs

##########################################################################
## Helper Models
Expand Down Expand Up @@ -139,13 +141,23 @@ def name(self):

def read_csv_headers(self):
"""
Returns the headers of the file
Method that reads first line of datafile to retrieve array of column names (header)
Also counts amount of lines in csv file
Returns tuple (header, amount_of_lines) for csv file
Returns None for non-csv file (TODO)
NOTE: Why can't we close this file without errors?
"""
self.dataset.open('r')
reader = csv.reader(self.dataset, delimiter=self.delimiter.encode('utf-8'))
header = reader.next()
self.dataset.close()
return header
self.data.open('rb')
if self.datatype == self.DATATYPE.csv:
# FieldFile produces stream of bytes, csv expects stream of string
# decode first line into string (replacing non-utf-8 chars with escaped values)
first_line_decoded = codecs.iterdecode(self.data, encoding='utf-8', errors='replace')
reader = csv.reader(first_line_decoded, delimiter=self.delimiter)
header = next(reader)
length = sum(1 for row in reader) # might be a bit slow
# No close method allowed?
return header, length
return None # ?? what to return for not a csv

def __unicode__(self):
return self.name
Expand Down
10 changes: 3 additions & 7 deletions dataset/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import base64
import hashlib
import unicodecsv as csv

from dataset.models import DataFile
from django.db.models.signals import pre_delete, pre_save
Expand All @@ -37,7 +36,7 @@ def datafile_file_compute(sender, instance, **kwargs):
TODO: Switch over to Celery async processing for this
TODO: Make this a lot better.
NOTE: Why can't we close this file without errors?
"""
sha = hashlib.sha256()
instance.data.open('rb')
Expand All @@ -48,16 +47,13 @@ def datafile_file_compute(sender, instance, **kwargs):
sha.update(instance.data.read())

if instance.datatype == instance.DATATYPE.csv:
reader = csv.reader(instance.data, delimiter=instance.delimiter.encode('utf-8'))
header = reader.next()
header, length = instance.read_csv_headers()
instance.dimensions = len(header)
instance.length = sum(1 for row in reader)
instance.length = length

instance.signature = base64.b64encode(sha.digest())
instance.filesize = instance.data.size

# No close method allowed?


@receiver(pre_delete, sender=DataFile)
def datafile_delete(sender, instance, **kwargs):
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ PyJWT==1.4.0
# Utilities
six==1.10.0
boto==2.39.0
unicodecsv==0.14.1
Markdown==2.6.6
bleach==1.4.2
python-dateutil==2.5.2
Expand Down

0 comments on commit e9df108

Please sign in to comment.