Skip to content
This repository has been archived by the owner on Dec 11, 2023. It is now read-only.

Commit

Permalink
Merge pull request #401 from bburan/bburan/pandas-from-items-deprecation
Browse files Browse the repository at this point in the history
Fix deprecation of DataFrame.from_items
  • Loading branch information
FrancescAlted committed Apr 10, 2020
2 parents d95dc83 + 07cec4a commit 3fc7e55
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 9 deletions.
6 changes: 3 additions & 3 deletions bcolz/ctable.py
Expand Up @@ -8,7 +8,7 @@

from __future__ import absolute_import

from collections import namedtuple
from collections import namedtuple, OrderedDict
from itertools import islice
import json
from keyword import iskeyword
Expand Down Expand Up @@ -803,8 +803,8 @@ def todataframe(self, columns=None, orient='columns'):
columns = None
# Use a generator here to minimize the number of column copies
# existing simultaneously in-memory
df = pd.DataFrame.from_items(
((key, self[key][:]) for key in keys),
df = pd.DataFrame.from_dict(
OrderedDict((key, self[key][:]) for key in keys),
columns=columns, orient=orient)
return df

Expand Down
3 changes: 2 additions & 1 deletion bench/pandas-fromdataframe-strings.py
@@ -1,6 +1,7 @@
# Benchmark for evaluate best ways to convert from a pandas dataframe
# (version with a mix of columns of ints and strings)

from collections import OrderedDict
import sys
import bcolz
import pandas as pd
Expand All @@ -22,7 +23,7 @@ def range(*args):
print("Creating inputs...")
a = bcolz.arange(NR, dtype='i4')
s = bcolz.fromiter(("%d"%i for i in xrange(NR)), dtype='S7', count=NR)
df = pd.DataFrame.from_items((
df = pd.DataFrame.from_dict(OrderedDict(
('f%d'%i, a[:] if i < (NC//2) else s[:]) for i in range(NC)))

dsize = (NR * (NC//2) * (a.dtype.itemsize + s.dtype.itemsize)) / 2. ** 20
Expand Down
3 changes: 2 additions & 1 deletion bench/pandas-fromdataframe.py
@@ -1,5 +1,6 @@
# Benchmark for evaluate best ways to convert from a pandas dataframe

from collections import OrderedDict
import bcolz
import pandas as pd
import numpy as np
Expand All @@ -12,7 +13,7 @@

print("Creating inputs...")
a = bcolz.arange(NR, dtype='i4')
df = pd.DataFrame.from_items((('f%d'%i, a[:]) for i in range(NC)))
df = pd.DataFrame.from_dict(OrderedDict(('f%d'%i, a[:]) for i in range(NC)))

dsize = (NR * NC * 4) / 2. ** 30

Expand Down
9 changes: 5 additions & 4 deletions bench/pandas-todataframe.py
@@ -1,5 +1,6 @@
# Benchmark for evaluate best ways to convert into a pandas dataframe

from collections import OrderedDict
import bcolz
import pandas as pd
from time import time
Expand All @@ -17,18 +18,18 @@
t0 = time()
tnames = list(t.names)
firstk = tnames.pop(0)
df = pd.DataFrame.from_items([(firstk, t[firstk][:])])
df = pd.DataFrame.from_dict(OrderedDict([(firstk, t[firstk][:])]))
for key in tnames:
df[key] = t[key][:]
tt = time() - t0
print("time with from_items (adding cols): %.2f (%.2f GB/s)" % (tt, dsize / tt))
print("time with from_dict (adding cols): %.2f (%.2f GB/s)" % (tt, dsize / tt))
del df

# Using a generator
t0 = time()
df = pd.DataFrame.from_items(((key, t[key][:]) for key in t.names))
df = pd.DataFrame.from_dict(OrderedDict((key, t[key][:]) for key in t.names))
tt = time() - t0
print("time with from_items: %.2f (%.2f GB/s)" % (tt, dsize / tt))
print("time with from_dict: %.2f (%.2f GB/s)" % (tt, dsize / tt))

# Using generic implementation
t0 = time()
Expand Down

0 comments on commit 3fc7e55

Please sign in to comment.