Skip to content

Commit

Permalink
fixups
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Mar 12, 2019
1 parent 6f619b5 commit 94a7baf
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 44 deletions.
53 changes: 32 additions & 21 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,22 +686,34 @@ def from_spmatrix(cls, data):
Parameters
----------
data : scipy.sparse.sp_matrix
This should be a 2-D SciPy sparse where the size
This should be a SciPy sparse matrix where the size
of the second dimension is 1. In other words, a
sparse matrix with a single column.
Returns
-------
SparseArray.
"""
assert data.ndim == 2
SparseArray
Examples
--------
>>> import scipy.sparse
>>> mat = scipy.sparse.coo_matrix((4, 1))
>>> pd.SparseArray.from_spmatrix(mat)
[0.0, 0.0, 0.0, 0.0]
Fill: 0.0
IntIndex
Indices: array([], dtype=int32)
"""
length, ncol = data.shape

assert ncol == 1
if ncol != 1:
raise ValueError(
"'data' must have a single column, not '{}'".format(ncol)
)

arr = data.data
idx, _ = data.nonzero()
idx.sort()
zero = np.array(0, dtype=arr.dtype).item()
dtype = SparseDtype(arr.dtype, zero)
index = IntIndex(length, idx)
Expand Down Expand Up @@ -1921,28 +1933,32 @@ def _make_index(length, indices, kind):
# ----------------------------------------------------------------------------
# Accessor

_validation_msg = "Can only use the '.sparse' accessor with Sparse data."

class BaseAccessor(object):
_validation_msg = "Can only use the '.sparse' accessor with Sparse data."

def __init__(self, data=None):
self._parent = data
self._validate(data)

def _validate(self, data):
raise NotImplementedError


@delegate_names(SparseArray, ['npoints', 'density', 'fill_value',
'sp_values'],
typ='property')
class SparseAccessor(PandasDelegate):
class SparseAccessor(BaseAccessor, PandasDelegate):
"""
Accessor for SparseSparse from other sparse matrix data types.
"""

def __init__(self, data=None):
# Store the Series since we need that for to_coo
self._parent = data
self._validate(data)

def _validate(self, data):
if not isinstance(data.dtype, SparseDtype):
raise AttributeError(_validation_msg)
raise AttributeError(self._validation_msg)

def _delegate_property_get(self, name, *args, **kwargs):
return getattr(self._parent.values, name)
return getattr(self._parent.array, name)

def _delegate_method(self, name, *args, **kwargs):
if name == 'from_coo':
Expand Down Expand Up @@ -2064,17 +2080,12 @@ def to_dense(self):
name=self._parent.name)


class SparseFrameAccessor(PandasDelegate):

def __init__(self, data=None):
# Store the Series since we need that for to_coo
self._parent = data
self._validate(data)
class SparseFrameAccessor(BaseAccessor, PandasDelegate):

def _validate(self, data):
dtypes = data.dtypes
if not all(isinstance(t, SparseDtype) for t in dtypes):
raise AttributeError(_validation_msg)
raise AttributeError(self._validation_msg)

@classmethod
def from_spmatrix(cls, data, index=None, columns=None):
Expand Down
23 changes: 3 additions & 20 deletions pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@
from pandas.core.dtypes.missing import isna, notna

import pandas.core.algorithms as algos
from pandas.core.arrays.sparse import SparseArray
from pandas.core.arrays.sparse import SparseArray, SparseFrameAccessor
import pandas.core.common as com
from pandas.core.frame import DataFrame
import pandas.core.generic as generic
from pandas.core.index import Index, MultiIndex, ensure_index
import pandas.core.indexes.base as ibase
from pandas.core.internals import (
BlockManager, create_block_manager_from_arrays)
from pandas.core.internals.construction import extract_index, prep_ndarray
Expand Down Expand Up @@ -198,7 +197,7 @@ def _init_matrix(self, data, index, columns, dtype=None):
Init self from ndarray or list of lists.
"""
data = prep_ndarray(data, copy=False)
index, columns = self._prep_index(data, index, columns)
index, columns = SparseFrameAccessor._prep_index(data, index, columns)
data = {idx: data[:, i] for i, idx in enumerate(columns)}
return self._init_dict(data, index, columns, dtype)

Expand All @@ -207,7 +206,7 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
"""
Init self from scipy.sparse matrix.
"""
index, columns = self._prep_index(data, index, columns)
index, columns = SparseFrameAccessor._prep_index(data, index, columns)
data = data.tocoo()
N = len(index)

Expand All @@ -234,21 +233,6 @@ def _init_spmatrix(self, data, index, columns, dtype=None,

return self._init_dict(sdict, index, columns, dtype)

def _prep_index(self, data, index, columns):
N, K = data.shape
if index is None:
index = ibase.default_index(N)
if columns is None:
columns = ibase.default_index(K)

if len(columns) != K:
raise ValueError('Column length mismatch: {columns} vs. {K}'
.format(columns=len(columns), K=K))
if len(index) != N:
raise ValueError('Index length mismatch: {index} vs. {N}'
.format(index=len(index), N=N))
return index, columns

def to_coo(self):
"""
Return the contents of the frame as a sparse SciPy COO matrix.
Expand All @@ -271,7 +255,6 @@ def to_coo(self):
float32. By numpy.find_common_type convention, mixing int64 and
and uint64 will result in a float64 dtype.
"""
from pandas.core.arrays.sparse import SparseFrameAccessor
return SparseFrameAccessor(self).to_coo()

def __array_wrap__(self, result):
Expand Down
14 changes: 11 additions & 3 deletions pandas/tests/arrays/sparse/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@

import pandas as pd
import pandas.util.testing as tm
import pandas.util._test_decorators as td


class TestSeriesAccessor(object):
# TODO: collect other accessor tests
# TODO: collect other Series accessor tests
def test_to_dense(self):
s = pd.Series([0, 1, 0, 10], dtype='Sparse[int64]')
result = s.sparse.to_dense()
Expand All @@ -17,15 +18,22 @@ def test_to_dense(self):


class TestFrameAccessor(object):

def test_accessor_raises(self):
df = pd.DataFrame({"A": [0, 1]})
with pytest.raises(AttributeError, match='sparse'):
df.sparse

@pytest.mark.parametrize('format', ['csc', 'csr', 'coo'])
@pytest.mark.parametrize("labels", [
None,
list(string.ascii_letters[:10]),
])
@pytest.mark.parametrize('dtype', ['float64', 'int64'])
@td.skip_if_no_scipy
def test_from_spmatrix(self, format, labels, dtype):
pytest.importorskip("scipy")
import scipy.sparse

sp_dtype = pd.SparseDtype(dtype, np.array(0, dtype=dtype).item())

mat = scipy.sparse.eye(10, format=format, dtype=dtype)
Expand All @@ -39,8 +47,8 @@ def test_from_spmatrix(self, format, labels, dtype):
).astype(sp_dtype)
tm.assert_frame_equal(result, expected)

@td.skip_if_no_scipy
def test_to_coo(self):
pytest.importorskip("scipy")
import scipy.sparse

df = pd.DataFrame({
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,24 @@ def test_constructor_inferred_fill_value(self, data, fill_value):
else:
assert result == fill_value

@pytest.mark.parametrize('format', ['coo', 'csc', 'csr'])
def test_from_spmatrix(self, format):
pytest.importorskip('scipy')
import scipy.sparse

mat = scipy.sparse.random(10, 1, density=0.5, format=format)
result = SparseArray.from_spmatrix(mat)
np.testing.assert_array_equal(mat.data, result.sp_values)

def test_from_spmatrix_raises(self):
pytest.importorskip('scipy')
import scipy.sparse

mat = scipy.sparse.eye(5, 4, format='csc')

with pytest.raises(ValueError, match="not '4'"):
SparseArray.from_spmatrix(mat)

@pytest.mark.parametrize('scalar,dtype', [
(False, SparseDtype(bool, False)),
(0.0, SparseDtype('float64', 0)),
Expand Down

0 comments on commit 94a7baf

Please sign in to comment.