Skip to content

Commit

Permalink
Rewrote file exports to use native and speedy OGR transformations
Browse files Browse the repository at this point in the history
  • Loading branch information
Carissa Brittain authored and Carissa Brittain committed Nov 17, 2011
1 parent 6567a37 commit a5173f2
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 138 deletions.
3 changes: 2 additions & 1 deletion settings.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@


#from settings_philadelphia import * #from settings_philadelphia import *
#from settings_sanfrancisco import * #from settings_sanfrancisco import *
from settings_sacramento import * from settings_greenprint import *
#from settings_dctreekit import *


DEBUG = True DEBUG = True
TEMPLATE_DEBUG = DEBUG TEMPLATE_DEBUG = DEBUG
Expand Down
188 changes: 66 additions & 122 deletions treemap/spreadsheet.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -11,6 +11,47 @@
from django.utils.encoding import smart_str, smart_unicode from django.utils.encoding import smart_str, smart_unicode
from django.utils.translation import ugettext as _ from django.utils.translation import ugettext as _


class LazyIterator(object):
def __init__(self, query_set, chunk_size=100):
self.query_set = query_set
self.chunk_size = chunk_size
self.chunk_index = 0
self.current_list_index = 0
self.current_list_length = 0

def __iter__(self):
return self

def __len__(self):
raise Exception("You can't do that because calling length would make it not lazy")

def reset(self):
self.chunk_index = 0
self.current_list_index = 0
self.current_list_length = 0

def next(self):
if (self.current_list_index < self.current_list_length):
# return the thing
current_item = self.current_list[self.current_list_index]
self.current_list_index = self.current_list_index + 1
return current_item
else:
self.next_chunk()
if self.current_list_length == 0:
raise StopIteration
return self.next()

def next_chunk(self):
# Update index and set current list to the next chunk
# if len(self.this_chunk) == 0 at the end of this then we're done
next_index = self.chunk_index + self.chunk_size
next_chunk = self.query_set[self.chunk_index:next_index]
self.chunk_index = next_index
self.current_list = list(next_chunk)
self.current_list_index = 0
self.current_list_length = len(self.current_list)

# from: http://www.djangosnippets.org/snippets/1151/ # from: http://www.djangosnippets.org/snippets/1151/
class ExcelResponse(HttpResponse): class ExcelResponse(HttpResponse):


Expand All @@ -20,136 +61,39 @@ def __init__(self, data, output_name='excel_data', headers=None,
# Make sure we've got the right type of data to work with # Make sure we've got the right type of data to work with
valid_data = False valid_data = False
if isinstance(data, ValuesQuerySet): if isinstance(data, ValuesQuerySet):
data = list(data) data = LazyIterator(data)
#data = list(data)
elif isinstance(data, QuerySet): elif isinstance(data, QuerySet):
data = list(data.values()) data = LazyIterator(data.values())
if hasattr(data, '__getitem__'): headers = next(data).keys()
if isinstance(data[0], dict):
if headers is None:
headers = data[0].keys()
data = [[row[col] for col in headers] for row in data]
data.insert(0, headers)
if hasattr(data[0], '__getitem__'):
valid_data = True
assert valid_data is True, "ExcelResponse requires a sequence of sequences"


import StringIO import StringIO
output = StringIO.StringIO() output = StringIO.StringIO()
# Excel has a limit on number of rows; if we have more than that, make a csv output.write('"%s"\n' % '","'.join(headers))
use_xls = False flush_index = 0
if len(data) <= 65536 and force_csv is not True: for row in data:
try: row = [row[col] for col in headers]
import xlwt out_row = []
except ImportError: for value in row:
# xlwt doesn't exist; fall back to csv if not isinstance(value, basestring):
pass value = unicode(value)
else: value = value.encode(encoding)
use_xls = True out_row.append(value.replace('"', '""'))
if use_xls: output.write('"%s"\n' %
book = xlwt.Workbook(encoding=encoding) '","'.join(out_row))
sheet = book.add_sheet('Sheet 1') flush_index = flush_index + 1
styles = {'datetime': xlwt.easyxf(num_format_str='yyyy-mm-dd hh:mm:ss'), if flush_index == 1000:
'date': xlwt.easyxf(num_format_str='yyyy-mm-dd'), output.flush()
'time': xlwt.easyxf(num_format_str='hh:mm:ss'), print "Flushing!"
'default': xlwt.Style.default_style} flush_index = 0


for rowx, row in enumerate(data): mimetype = 'text/csv'
for colx, value in enumerate(row): file_ext = 'csv'
if isinstance(value, datetime.datetime):
cell_style = styles['datetime']
elif isinstance(value, datetime.date):
cell_style = styles['date']
elif isinstance(value, datetime.time):
cell_style = styles['time']
else:
cell_style = styles['default']
sheet.write(rowx, colx, value, style=cell_style)
book.save(output)
mimetype = 'application/vnd.ms-excel'
file_ext = 'xls'
else:
for row in data:
out_row = []
for value in row:
if not isinstance(value, basestring):
value = unicode(value)
value = value.encode(encoding)
out_row.append(value.replace('"', '""'))
output.write('"%s"\n' %
'","'.join(out_row))
mimetype = 'text/csv'
file_ext = 'csv'
output.seek(0) output.seek(0)
super(ExcelResponse, self).__init__(content=output.getvalue(), super(ExcelResponse, self).__init__(content=output.getvalue(),
mimetype=mimetype) mimetype=mimetype)
self['Content-Disposition'] = 'attachment;filename="%s.%s"' % \ self['Content-Disposition'] = 'attachment;filename="%s.%s"' % \
(output_name.replace('"', '\"'), file_ext) (output_name.replace('"', '\"'), file_ext)


# from: http://www.djangosnippets.org/snippets/1151/
def queryset_to_excel_file( qs, filename, force_csv=False,headers=None, encoding='utf8'):


data = qs

# Make sure we've got the right type of data to work with
valid_data = False
if isinstance(data, ValuesQuerySet):
data = list(data)
elif isinstance(data, QuerySet):
data = list(data.values())
if hasattr(data, '__getitem__'):
if isinstance(data[0], dict):
if headers is None:
headers = data[0].keys()
data = [[row[col] for col in headers] for row in data]
data.insert(0, headers)
if hasattr(data[0], '__getitem__'):
valid_data = True
assert valid_data is True, "ExcelResponse requires a sequence of sequences"


# Excel has a limit on number of rows; if we have more than that, make a csv
use_xls = False
file_ext = '.csv'
if len(data) <= 65536 and force_csv is not True:
try:
import xlwt
except ImportError:
# xlwt doesn't exist; fall back to csv
pass
else:
use_xls = True
file_ext = '.xls'
name = filename + file_ext
output = file(name,'wb')
if use_xls:
book = xlwt.Workbook(encoding=encoding)
sheet = book.add_sheet('Sheet 1')
styles = {'datetime': xlwt.easyxf(num_format_str='yyyy-mm-dd hh:mm:ss'),
'date': xlwt.easyxf(num_format_str='yyyy-mm-dd'),
'time': xlwt.easyxf(num_format_str='hh:mm:ss'),
'default': xlwt.Style.default_style}

for rowx, row in enumerate(data):
for colx, value in enumerate(row):
if isinstance(value, datetime.datetime):
cell_style = styles['datetime']
elif isinstance(value, datetime.date):
cell_style = styles['date']
elif isinstance(value, datetime.time):
cell_style = styles['time']
else:
cell_style = styles['default']
sheet.write(rowx, colx, value, style=cell_style)
book.save(output)
else:
for row in data:
out_row = []
for value in row:
if not isinstance(value, basestring):
value = unicode(value)
value = value.encode(encoding)
out_row.append(value.replace('"', '""'))
output.write('"%s"\n' %
'","'.join(out_row))
output.close()
return name
75 changes: 60 additions & 15 deletions treemap/views.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
from django.forms.formsets import formset_factory from django.forms.formsets import formset_factory
from django.forms.models import inlineformset_factory, modelformset_factory from django.forms.models import inlineformset_factory, modelformset_factory


from shapes.views import ShpResponder

import simplejson import simplejson


from models import * from models import *
Expand All @@ -33,6 +31,14 @@
import time import time
from time import mktime, strptime from time import mktime, strptime
from datetime import timedelta from datetime import timedelta
import tempfile
import zipfile
import subprocess

try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO




app_models = {'UserProfile':'profiles','User':'auth'} app_models = {'UserProfile':'profiles','User':'auth'}
Expand Down Expand Up @@ -1238,6 +1244,51 @@ def _build_tree_search_result(request):


return trees, geog_obj, ' AND '.join(tile_query) return trees, geog_obj, ' AND '.join(tile_query)



def zip_shp(shapefile_path,archive_name):
buffer = StringIO()
zip = zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED)
files = ['shp','shx','prj','dbf']
for item in files:
filename = '%s.%s' % (shapefile_path.replace('.shp',''), item)
zip.write(filename, arcname='%s.%s' % (archive_name.replace('.shp',''), item))
zip.close()
buffer.flush()
zip_stream = buffer.getvalue()
buffer.close()
return zip_stream


def zip_file(file_path,archive_name):
buffer = StringIO()
zip = zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED)
zip.write(file_path, arcname=archive_name)
zip.close()
buffer.flush()
zip_stream = buffer.getvalue()
buffer.close()
return zip_stream

def ogr_conversion(output_type, sql, extension=None):
dbsettings = settings.DATABASES['default']
if extension:
tmp = tempfile.NamedTemporaryFile(suffix='.%s' % extension, mode = 'w+b')
# we must close the file for GDAL to be able to open and write to it
tmp.close()
tmp_name = tmp.name
else:
tmp_name = tempfile.mkdtemp()

command = ['ogr2ogr', '-sql', sql, '-f', output_type, tmp_name, 'PG:dbname=%s host=%s port=%s password=%s user=%s' % (dbsettings['NAME'], dbsettings['HOST'], dbsettings['PORT'], dbsettings['PASSWORD'], dbsettings['USER']) ]
done = subprocess.call(command)
if done != 0:
return render_to_json({'status':'error'})
else:
zipfile = zip_file(tmp_name,'trees')
response = HttpResponse(zipfile, mimetype='application/zip')
response['Content-Disposition'] = 'attachment; filename=trees.zip'
return response

def advanced_search(request, format='json'): def advanced_search(request, format='json'):
""" """
urlparams: urlparams:
Expand All @@ -1263,21 +1314,15 @@ def advanced_search(request, format='json'):
if format == "geojson": if format == "geojson":
return render_to_geojson(trees, geom_field='geometry', additional_data={'summaries': esj}) return render_to_geojson(trees, geom_field='geometry', additional_data={'summaries': esj})
elif format == "shp": elif format == "shp":
print 'shp for %s trees' % len(trees) sql = str(trees.query)
shpresponder = ShpResponder(trees,geo_field='geometry') return ogr_conversion('ESRI Shapefile', sql)
tmp = shpresponder.write_shapefile_to_tmp_file(shpresponder.queryset)
zipfile = shpresponder.zip_response(tmp,shpresponder.file_name,shpresponder.mimetype,shpresponder.readme)
response = HttpResponse(zipfile, mimetype='application/zip')
response['Content-Disposition'] = 'attachment; filename=trees.zip'
return response
elif format == "kml": elif format == "kml":
print 'kml for %s trees' % len(trees) sql = str(trees.query)
trees = trees.kml() return ogr_conversion('KML', sql, 'kml')
print 'kml for %s trees' % len(trees)
return render_to_kml("treemap/kml_output.kml", {'trees': trees,'root_url':settings.ROOT_URL})
elif format == "csv": elif format == "csv":
return ExcelResponse(trees, force_csv=True) sql = str(trees.query)

return ogr_conversion('CSV', sql, 'csv')



geography = None geography = None
summaries, benefits = None, None summaries, benefits = None, None
Expand Down

0 comments on commit a5173f2

Please sign in to comment.