Merge pull request #401 from bburan/bburan/pandas-from-items-deprecation

Fix deprecation of DataFrame.from_items
Blosc · Apr 10, 2020 · 3fc7e55 · 3fc7e55
2 parents d95dc83 + 07cec4a
commit 3fc7e55
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 9 deletions.
diff --git a/bcolz/ctable.py b/bcolz/ctable.py
@@ -8,7 +8,7 @@
 
 from __future__ import absolute_import
 
-from collections import namedtuple
+from collections import namedtuple, OrderedDict
 from itertools import islice
 import json
 from keyword import iskeyword
@@ -803,8 +803,8 @@ def todataframe(self, columns=None, orient='columns'):
             columns = None
         # Use a generator here to minimize the number of column copies
         # existing simultaneously in-memory
-        df = pd.DataFrame.from_items(
-            ((key, self[key][:]) for key in keys),
+        df = pd.DataFrame.from_dict(
+            OrderedDict((key, self[key][:]) for key in keys),
             columns=columns, orient=orient)
         return df
 

diff --git a/bench/pandas-fromdataframe-strings.py b/bench/pandas-fromdataframe-strings.py
@@ -1,6 +1,7 @@
 # Benchmark for evaluate best ways to convert from a pandas dataframe
 # (version with a mix of columns of ints and strings)
 
+from collections import OrderedDict
 import sys
 import bcolz
 import pandas as pd
@@ -22,7 +23,7 @@ def range(*args):
 print("Creating inputs...")
 a = bcolz.arange(NR, dtype='i4')
 s = bcolz.fromiter(("%d"%i for i in xrange(NR)), dtype='S7', count=NR)
-df = pd.DataFrame.from_items((
+df = pd.DataFrame.from_dict(OrderedDict(
     ('f%d'%i, a[:] if i < (NC//2) else s[:]) for i in range(NC)))
 
 dsize = (NR * (NC//2) * (a.dtype.itemsize + s.dtype.itemsize)) / 2. ** 20

diff --git a/bench/pandas-fromdataframe.py b/bench/pandas-fromdataframe.py
@@ -1,5 +1,6 @@
 # Benchmark for evaluate best ways to convert from a pandas dataframe
 
+from collections import OrderedDict
 import bcolz
 import pandas as pd
 import numpy as np
@@ -12,7 +13,7 @@
 
 print("Creating inputs...")
 a = bcolz.arange(NR, dtype='i4')
-df = pd.DataFrame.from_items((('f%d'%i, a[:]) for i in range(NC)))
+df = pd.DataFrame.from_dict(OrderedDict(('f%d'%i, a[:]) for i in range(NC)))
 
 dsize = (NR * NC * 4) / 2. ** 30
 

diff --git a/bench/pandas-todataframe.py b/bench/pandas-todataframe.py
@@ -1,5 +1,6 @@
 # Benchmark for evaluate best ways to convert into a pandas dataframe
 
+from collections import OrderedDict
 import bcolz
 import pandas as pd
 from time import time
@@ -17,18 +18,18 @@
 t0 = time()
 tnames = list(t.names)
 firstk = tnames.pop(0)
-df = pd.DataFrame.from_items([(firstk, t[firstk][:])])
+df = pd.DataFrame.from_dict(OrderedDict([(firstk, t[firstk][:])]))
 for key in tnames:
     df[key] = t[key][:]
 tt = time() - t0
-print("time with from_items (adding cols): %.2f (%.2f GB/s)" % (tt, dsize / tt))
+print("time with from_dict (adding cols): %.2f (%.2f GB/s)" % (tt, dsize / tt))
 del df
 
 # Using a generator
 t0 = time()
-df = pd.DataFrame.from_items(((key, t[key][:]) for key in t.names))
+df = pd.DataFrame.from_dict(OrderedDict((key, t[key][:]) for key in t.names))
 tt = time() - t0
-print("time with from_items: %.2f (%.2f GB/s)" % (tt, dsize / tt))
+print("time with from_dict: %.2f (%.2f GB/s)" % (tt, dsize / tt))
 
 # Using generic implementation
 t0 = time()