Merge pull request #285 from alimanfoo/upgrade-blosc-1.7.0-minimal

Minimally upgrade blosc 1.7.0
Blosc · Dec 16, 2015 · 562fd30 · 562fd30
2 parents 3f696d1 + e61d2f6
commit 562fd30
Show file tree

Hide file tree

Showing 83 changed files with 10,754 additions and 5,182 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,14 +1,26 @@
 build
 bcolz/carray_ext.c
-bcolz/carray_ext.so
+bcolz/*.so
 bcolz/version.py
 **/*.pyc
-
-*.pyd
-bcolz.egg-info/
+
+*.pyd
+bcolz.egg-info/
+.eggs
+
+# data files generated by benchmark scripts
+*.h5
+*.bcolz
+myarray
+large_carray-bench
 
 # airspeed velocity
 bench_asv/env
 bench_asv/bcolz/
 bench_asv/results
 bench_asv/project
+bench_asv/html
+
+# pycharm
+.idea
+
diff --git a/bench/column-iter.py b/bench/column-iter.py
@@ -1,7 +1,16 @@
 from __future__ import print_function
+import sys
 import contextlib, time
 import bcolz, numpy
 
+
+PY2 = sys.version_info[0] == 2
+if not PY2:
+    xrange = range
+    def range(*args):
+        return list(xrange(*args))
+
+
 @contextlib.contextmanager
 def ctime(label=""):
     "Counts the time spent in some context"

diff --git a/bench/pandas-fromdataframe-strings.py b/bench/pandas-fromdataframe-strings.py
@@ -1,12 +1,20 @@
 # Benchmark for evaluate best ways to convert from a pandas dataframe
 # (version with a mix of columns of ints and strings)
 
+import sys
 import bcolz
 import pandas as pd
-import numpy as np
 from time import time
 
-NR = int(1e6)
+
+PY2 = sys.version_info[0] == 2
+if not PY2:
+    xrange = range
+    def range(*args):
+        return list(xrange(*args))
+
+
+NR = int(1e4)
 NC = 100
 
 #bcolz.cparams.setdefaults(clevel=0)

diff --git a/bench/pytables-fromhdf5.py b/bench/pytables-fromhdf5.py
@@ -1,10 +1,19 @@
 # Benchmark for evaluate best ways to read from a PyTables Table
 
+import sys
 import bcolz
 import tables as tb
 import numpy as np
 from time import time
 
+
+PY2 = sys.version_info[0] == 2
+if not PY2:
+    xrange = range
+    def range(*args):
+        return list(xrange(*args))
+
+
 filepath = 'fromhdf5.h5'
 nodepath = '/ctable'
 NR = int(1e6)

diff --git a/c-blosc/ANNOUNCE.rst b/c-blosc/ANNOUNCE.rst
@@ -1,14 +1,30 @@
 ===============================================================
- Announcing c-blosc 1.4.1
- A blocking, shuffling and lossless compression library
+ Announcing c-blosc 1.7.0
+ A blocking, shuffling and lossless compression library for C
 ===============================================================
 
 What is new?
 ============
 
-This is a maintenance release, where a bug in blosc_getitem() introduced
-in 1.4.0 has been tracked down and squashed.  If you use this function,
-please update your c-blosc copy.
+This is a quite big release introducing some exciting new features:
+
+* A new 'bitshuffle' filter is here.  This is similar that the
+  existing 'shuffle' filter, but the shuffle takes place at bit level,
+  and not at byte level.  With it you can expect higher compression
+  ratios but still having pretty good speed.  For more info, see:
+  http://blosc.org/blog/new-bitshuffle-filter.html
+
+* Implemented a new acceleration mode for LZ4 (updated to 1.7.0) and
+  BloscLZ codecs that enters in operation with all compression levels
+  except for the highest (9).  This allows for an important boost in
+  speed with minimal compression ratio loss.
+
+* Jack Pappas made great contributions allowing SSE2 operation in more
+  scenarios (like types larger than 16 bytes or buffers not being a
+  multiple of typesize * vectorsize).  Another contribution is a much
+  more comprehensive test suite for SSE2 and AVX2 operation.
+
+* Zbyszek Szmek fixed compilation on non-Intel archs (tested on ARM).
 
 For more info, please see the release notes in:
 
@@ -18,20 +34,15 @@ https://github.com/Blosc/c-blosc/wiki/Release-notes
 What is it?
 ===========
 
-Blosc (http://www.blosc.org) is a high performance compressor
+Blosc (http://www.blosc.org) is a high performance meta-compressor
 optimized for binary data.  It has been designed to transmit data to
 the processor cache faster than the traditional, non-compressed,
 direct memory fetch approach via a memcpy() OS call.
 
-Blosc is the first compressor (that I'm aware of) that is meant not
-only to reduce the size of large datasets on-disk or in-memory, but
-also to accelerate object manipulations that are memory-bound.
-
-Blosc has a Python wrapper called python-blosc
-(https://github.com/Blosc/python-blosc) with a high-performance
-interface to NumPy too.  There is also a handy command line for Blosc
-called Bloscpack (https://github.com/Blosc/bloscpack) that allows you to
-compress large binary datafiles on-disk.
+Blosc has internal support for different compressors like its internal
+BloscLZ, but also LZ4, LZ4HC, Snappy and Zlib.  This way these can
+automatically leverage the multithreading and pre-filtering
+(shuffling) capabilities that comes with Blosc.
 
 
 Download sources
@@ -60,9 +71,3 @@ http://groups.google.es/group/blosc
 
 Enjoy Data!
 
-
-.. Local Variables:
-.. mode: rst
-.. coding: utf-8
-.. fill-column: 70
-.. End: