Rewrote file exports to use native and speedy OGR transformations

OpenTreeMap · Nov 17, 2011 · a5173f2 · a5173f2
1 parent 6567a37
commit a5173f2
Show file tree

Hide file tree

Showing 3 changed files with 128 additions and 138 deletions.
diff --git a/settings.py b/settings.py
@@ -2,7 +2,8 @@
 
 #from settings_philadelphia import *
 #from settings_sanfrancisco import *
-from settings_sacramento import *
+from settings_greenprint import *
+#from settings_dctreekit import *
 
 DEBUG = True
 TEMPLATE_DEBUG = DEBUG

diff --git a/treemap/spreadsheet.py b/treemap/spreadsheet.py
@@ -11,6 +11,47 @@
 from django.utils.encoding import smart_str, smart_unicode
 from django.utils.translation import ugettext as _
 
+class LazyIterator(object):
+    def __init__(self, query_set, chunk_size=100):
+        self.query_set = query_set
+        self.chunk_size = chunk_size
+        self.chunk_index = 0
+        self.current_list_index = 0
+        self.current_list_length = 0
+
+    def __iter__(self): 
+        return self
+
+    def __len__(self): 
+        raise Exception("You can't do that because calling length would make it not lazy")
+
+    def reset(self):
+        self.chunk_index = 0
+        self.current_list_index = 0
+        self.current_list_length = 0
+
+    def next(self):
+        if (self.current_list_index < self.current_list_length):
+            # return the thing
+            current_item = self.current_list[self.current_list_index]
+            self.current_list_index = self.current_list_index + 1
+            return current_item
+        else: 
+            self.next_chunk()
+            if self.current_list_length == 0:
+                raise StopIteration
+            return self.next()
+
+    def next_chunk(self):
+        # Update index and set current list to the next chunk
+        # if len(self.this_chunk) == 0 at the end of this then we're done
+        next_index = self.chunk_index + self.chunk_size
+        next_chunk = self.query_set[self.chunk_index:next_index]
+        self.chunk_index = next_index
+        self.current_list = list(next_chunk)
+        self.current_list_index = 0
+        self.current_list_length = len(self.current_list)
+
 # from: http://www.djangosnippets.org/snippets/1151/
 class ExcelResponse(HttpResponse):
 
@@ -20,136 +61,39 @@ def __init__(self, data, output_name='excel_data', headers=None,
         # Make sure we've got the right type of data to work with
         valid_data = False
         if isinstance(data, ValuesQuerySet):
-            data = list(data)
+            data = LazyIterator(data)
+            #data = list(data)
         elif isinstance(data, QuerySet):
-            data = list(data.values())
+            data = LazyIterator(data.values())
-        if hasattr(data, '__getitem__'):
+        headers = next(data).keys()
-            if isinstance(data[0], dict):
+
-                if headers is None:
-                    headers = data[0].keys()
-                data = [[row[col] for col in headers] for row in data]
-                data.insert(0, headers)
-            if hasattr(data[0], '__getitem__'):
-                valid_data = True
-        assert valid_data is True, "ExcelResponse requires a sequence of sequences"
 
         import StringIO
         output = StringIO.StringIO()
-        # Excel has a limit on number of rows; if we have more than that, make a csv
+        output.write('"%s"\n' % '","'.join(headers))    
-        use_xls = False
+        flush_index = 0
-        if len(data) <= 65536 and force_csv is not True:
+        for row in data:
-            try:
+            row = [row[col] for col in headers]
-                import xlwt
+            out_row = []
-            except ImportError:
+            for value in row:
-                # xlwt doesn't exist; fall back to csv
+                if not isinstance(value, basestring):
-                pass
+                    value = unicode(value)
-            else:
+                value = value.encode(encoding)
-                use_xls = True
+                out_row.append(value.replace('"', '""'))
-        if use_xls:
+            output.write('"%s"\n' %
-            book = xlwt.Workbook(encoding=encoding)
+                         '","'.join(out_row))     
-            sheet = book.add_sheet('Sheet 1')
+            flush_index = flush_index + 1       
-            styles = {'datetime': xlwt.easyxf(num_format_str='yyyy-mm-dd hh:mm:ss'),
+            if flush_index == 1000: 
-                      'date': xlwt.easyxf(num_format_str='yyyy-mm-dd'),
+                output.flush()
-                      'time': xlwt.easyxf(num_format_str='hh:mm:ss'),
+                print "Flushing!"
-                      'default': xlwt.Style.default_style}
+                flush_index = 0
-
+
-            for rowx, row in enumerate(data):
+        mimetype = 'text/csv'
-                for colx, value in enumerate(row):
+        file_ext = 'csv'
-                    if isinstance(value, datetime.datetime):
-                        cell_style = styles['datetime']
-                    elif isinstance(value, datetime.date):
-                        cell_style = styles['date']
-                    elif isinstance(value, datetime.time):
-                        cell_style = styles['time']
-                    else:
-                        cell_style = styles['default']
-                    sheet.write(rowx, colx, value, style=cell_style)
-            book.save(output)
-            mimetype = 'application/vnd.ms-excel'
-            file_ext = 'xls'
-        else:
-            for row in data:
-                out_row = []
-                for value in row:
-                    if not isinstance(value, basestring):
-                        value = unicode(value)
-                    value = value.encode(encoding)
-                    out_row.append(value.replace('"', '""'))
-                output.write('"%s"\n' %
-                             '","'.join(out_row))            
-            mimetype = 'text/csv'
-            file_ext = 'csv'
         output.seek(0)
         super(ExcelResponse, self).__init__(content=output.getvalue(),
                                             mimetype=mimetype)
         self['Content-Disposition'] = 'attachment;filename="%s.%s"' % \
             (output_name.replace('"', '\"'), file_ext)
 
-# from: http://www.djangosnippets.org/snippets/1151/
-def queryset_to_excel_file( qs, filename, force_csv=False,headers=None, encoding='utf8'):
 
-        data = qs
-
-        # Make sure we've got the right type of data to work with
-        valid_data = False
-        if isinstance(data, ValuesQuerySet):
-            data = list(data)
-        elif isinstance(data, QuerySet):
-            data = list(data.values())
-        if hasattr(data, '__getitem__'):
-            if isinstance(data[0], dict):
-                if headers is None:
-                    headers = data[0].keys()
-                data = [[row[col] for col in headers] for row in data]
-                data.insert(0, headers)
-            if hasattr(data[0], '__getitem__'):
-                valid_data = True
-        assert valid_data is True, "ExcelResponse requires a sequence of sequences"
-
-
-        # Excel has a limit on number of rows; if we have more than that, make a csv
-        use_xls = False
-        file_ext = '.csv'
-        if len(data) <= 65536 and force_csv is not True:
-            try:
-                import xlwt
-            except ImportError:
-                # xlwt doesn't exist; fall back to csv
-                pass
-            else:
-                use_xls = True
-                file_ext = '.xls'
-        name = filename + file_ext
-        output = file(name,'wb')
-        if use_xls:
-            book = xlwt.Workbook(encoding=encoding)
-            sheet = book.add_sheet('Sheet 1')
-            styles = {'datetime': xlwt.easyxf(num_format_str='yyyy-mm-dd hh:mm:ss'),
-                      'date': xlwt.easyxf(num_format_str='yyyy-mm-dd'),
-                      'time': xlwt.easyxf(num_format_str='hh:mm:ss'),
-                      'default': xlwt.Style.default_style}
-
-            for rowx, row in enumerate(data):
-                for colx, value in enumerate(row):
-                    if isinstance(value, datetime.datetime):
-                        cell_style = styles['datetime']
-                    elif isinstance(value, datetime.date):
-                        cell_style = styles['date']
-                    elif isinstance(value, datetime.time):
-                        cell_style = styles['time']
-                    else:
-                        cell_style = styles['default']
-                    sheet.write(rowx, colx, value, style=cell_style)
-            book.save(output)
-        else:
-            for row in data:
-                out_row = []
-                for value in row:
-                    if not isinstance(value, basestring):
-                        value = unicode(value)
-                    value = value.encode(encoding)
-                    out_row.append(value.replace('"', '""'))
-                output.write('"%s"\n' %
-                             '","'.join(out_row))
-        output.close()
-        return name
diff --git a/treemap/views.py b/treemap/views.py
@@ -21,8 +21,6 @@
 from django.forms.formsets import formset_factory
 from django.forms.models import inlineformset_factory, modelformset_factory
 
-from shapes.views import ShpResponder
-
 import simplejson 
 
 from models import *
@@ -33,6 +31,14 @@
 import time
 from time import mktime, strptime
 from datetime import timedelta
+import tempfile
+import zipfile
+import subprocess
+
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
 
 
 app_models = {'UserProfile':'profiles','User':'auth'}
@@ -1238,6 +1244,51 @@ def _build_tree_search_result(request):
 
     return trees, geog_obj, ' AND '.join(tile_query)
 
+
+def zip_shp(shapefile_path,archive_name):
+        buffer = StringIO()
+        zip = zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED)
+        files = ['shp','shx','prj','dbf']
+        for item in files:
+            filename = '%s.%s' % (shapefile_path.replace('.shp',''), item)
+            zip.write(filename, arcname='%s.%s' % (archive_name.replace('.shp',''), item))
+        zip.close()
+        buffer.flush()
+        zip_stream = buffer.getvalue()
+        buffer.close()
+        return zip_stream
+
+
+def zip_file(file_path,archive_name):
+        buffer = StringIO()
+        zip = zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED)
+        zip.write(file_path, arcname=archive_name)
+        zip.close()
+        buffer.flush()
+        zip_stream = buffer.getvalue()
+        buffer.close()
+        return zip_stream
+
+def ogr_conversion(output_type, sql, extension=None):   
+    dbsettings = settings.DATABASES['default'] 
+    if extension:
+        tmp = tempfile.NamedTemporaryFile(suffix='.%s' % extension, mode = 'w+b')
+        # we must close the file for GDAL to be able to open and write to it
+        tmp.close()
+        tmp_name = tmp.name
+    else:
+        tmp_name = tempfile.mkdtemp()
+
+    command = ['ogr2ogr', '-sql', sql, '-f', output_type, tmp_name, 'PG:dbname=%s host=%s port=%s password=%s user=%s' % (dbsettings['NAME'], dbsettings['HOST'], dbsettings['PORT'], dbsettings['PASSWORD'], dbsettings['USER']) ]
+    done = subprocess.call(command)
+    if done != 0: 
+        return render_to_json({'status':'error'})
+    else: 
+        zipfile = zip_file(tmp_name,'trees')
+        response = HttpResponse(zipfile, mimetype='application/zip')
+        response['Content-Disposition'] = 'attachment; filename=trees.zip'
+        return response
+
 def advanced_search(request, format='json'):
     """
     urlparams:
@@ -1263,21 +1314,15 @@ def advanced_search(request, format='json'):
     if format == "geojson":    
         return render_to_geojson(trees, geom_field='geometry', additional_data={'summaries': esj})
     elif format == "shp":
-        print 'shp for %s trees' % len(trees)
+        sql = str(trees.query)
-        shpresponder = ShpResponder(trees,geo_field='geometry')
+        return ogr_conversion('ESRI Shapefile', sql)
-        tmp = shpresponder.write_shapefile_to_tmp_file(shpresponder.queryset)
-        zipfile = shpresponder.zip_response(tmp,shpresponder.file_name,shpresponder.mimetype,shpresponder.readme)
-        response = HttpResponse(zipfile, mimetype='application/zip')
-        response['Content-Disposition'] = 'attachment; filename=trees.zip'
-        return response
     elif format == "kml":
-        print 'kml for %s trees' % len(trees)
+        sql = str(trees.query)
-        trees = trees.kml()
+        return ogr_conversion('KML', sql, 'kml')
-        print 'kml for %s trees' % len(trees)
-        return render_to_kml("treemap/kml_output.kml", {'trees': trees,'root_url':settings.ROOT_URL})
     elif format == "csv":
-        return ExcelResponse(trees, force_csv=True)
+        sql = str(trees.query)
-
+        return ogr_conversion('CSV', sql, 'csv')
+
 
     geography = None
     summaries, benefits = None, None