Skip to content

Commit

Permalink
add some tests for sparse module, improve docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Levy committed Nov 11, 2013
1 parent 236973d commit e91c086
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 18 deletions.
4 changes: 2 additions & 2 deletions Makefile
Expand Up @@ -20,12 +20,12 @@ clean:

# Tests...
#
test-code: in
test-code:
$(NOSETESTS) -s mrec

test-coverage:
$(NOSETESTS) -s --with-coverage --cover-html --cover-html-dir=coverage \
--cover-package=mrec mrec

test: test-code test-doc
test: test-code

84 changes: 68 additions & 16 deletions mrec/sparse.py
Expand Up @@ -13,7 +13,6 @@ def loadtxt(filepath,comments='#',delimiter=None,skiprows=0,usecols=None,index_o
Parameters
----------
filepath : file or str
File containing simply formatted row,col,val sparse matrix data.
comments : str, optional
Expand All @@ -33,7 +32,6 @@ def loadtxt(filepath,comments='#',delimiter=None,skiprows=0,usecols=None,index_o
Returns
-------
mat : scipy.sparse.coo_matrix
The sparse matrix.
"""
Expand All @@ -46,29 +44,36 @@ def loadtxt(filepath,comments='#',delimiter=None,skiprows=0,usecols=None,index_o
shape = (max(row)+1,max(col)+1)
return coo_matrix((data,(row,col)),shape=shape)

def savez(d,filepath):
def savez(d,file):
"""
Save a sparse matrix to file in numpy binary format.
Parameters
----------
d : scipy sparse matrix
The sparse matrix to save.
filepath : str
The filepath to write to.
file : str or file
Either the file name (string) or an open file (file-like object)
where the matrix will be saved. If file is a string, the ``.npz``
extension will be appended to the file name if it is not already there.
"""
np.savez(filepath,row=d.row,col=d.col,data=d.data,shape=d.shape)
np.savez(file,row=d.row,col=d.col,data=d.data,shape=d.shape)

def loadz(filepath):
def loadz(file):
"""
Load a sparse matrix saved to file with savez.
Parameters
----------
filepath : str
The filepath to read from.
file : str
The open file or filepath to read from.
Returns
-------
mat : scipy.sparse.coo_matrix
The sparse matrix.
"""
y = np.load(filepath)
y = np.load(file)
return coo_matrix((y['data'],(y['row'],y['col'])),shape=y['shape'])

class fast_sparse_matrix(object):
Expand Down Expand Up @@ -113,22 +118,56 @@ def __init__(self,X,col_view=None):

@property
def shape(self):
"""Return the shape of the underlying matrix."""
"""
Return the shape of the underlying matrix.
"""
return self.X.shape

def fast_get_col(self,j):
"""Return column j."""
"""
Return column j of the underlying matrix.
Parameters
----------
j : int
Index of column to get.
Returns
-------
col : scipy.sparse.csc_matrix
Copy of column j of the matrix.
"""
col = self.col_view[:,j].copy()
col.data = self.X.data[col.data]
return col

def fast_update_col(self,j,vals):
"""Update values of existing non-zeros in column j."""
"""
Update values of existing non-zeros in column
of the underlying matrix.
Parameters
----------
j : int
Index of the column to update.
vals : array like
The new values to be assigned, must satisfy
len(vals) == X[:,j].nnz i.e. this method can
only change the value of existing non-zero entries
of column j, it cannot add new ones.
"""
dataptr = self.col_view[:,j].data
self.X.data[dataptr] = vals

def save(self,filepath):
"""Save to file as arrays in numpy binary format."""
"""
Save to file as arrays in numpy binary format.
Parameters
----------
filepath : str
The filepath to write to.
"""
d = self.X.tocoo(copy=False)
v = self.col_view.tocoo(copy=False)
np.savez(filepath,row=d.row,col=d.col,data=d.data,shape=d.shape,
Expand All @@ -138,6 +177,11 @@ def save(self,filepath):
def load(filepath):
"""
Load a fast_sparse_matrix from file written by fast_sparse_matrix.save().
Parameters
----------
filepath : str
The filepath to load.
"""
y = np.load(filepath,mmap_mode='r')
X = coo_matrix((y['data'],(y['row'],y['col'])),shape=y['shape'])
Expand All @@ -152,7 +196,6 @@ def loadtxt(filepath,comments='#',delimiter=None,skiprows=0,usecols=None,index_o
Parameters
----------
filepath : file or str
File containing simply formatted row,col,val sparse matrix data.
comments : str, optional
Expand All @@ -169,6 +212,11 @@ def loadtxt(filepath,comments='#',delimiter=None,skiprows=0,usecols=None,index_o
Offset applied to the row and col indices in the input data (default: 1).
The default offset is chosen so that 1-indexed data on file results in a
fast_sparse_matrix holding 0-indexed matrices.
Returns
-------
mat : mrec.sparse.fast_sparse_matrix
A fast_sparse_matrix holding the data in the file.
"""
X = loadtxt(filepath,comments=comments,delimiter=delimiter,skiprows=skiprows,usecols=usecols)
return fast_sparse_matrix(X)
Expand All @@ -180,9 +228,13 @@ def loadmm(filepath):
Parameters
----------
filepath : file or str
The matrixmarket file to read.
Returns
-------
mat : mrec.sparse.fast_sparse_matrix
A fast_sparse_matrix holding the data in the file.
"""
X = mmread(filepath)
return fast_sparse_matrix(X)
Expand Down
74 changes: 74 additions & 0 deletions mrec/tests/test_sparse.py
@@ -0,0 +1,74 @@
import tempfile
import os
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_array_equal

from mrec.testing import get_random_coo_matrix

from mrec.sparse import loadtxt
from mrec.sparse import savez
from mrec.sparse import loadz
from mrec.sparse import fast_sparse_matrix

def test_loadtxt():
X = get_random_coo_matrix()
f,path = tempfile.mkstemp(suffix='.npz')
with open(path,'w') as f:
for i,j,v in zip(X.row,X.col,X.data):
print >>f,'{0}\t{1}\t{2}'.format(i+1,j+1,v)
Y = loadtxt(path)
os.remove(path)
assert_array_equal(X.toarray(),Y.toarray())

def test_savez_loadz():
m = get_random_coo_matrix()
f,path = tempfile.mkstemp(suffix='.npz')
savez(m,path)
n = loadz(path)
os.remove(path)
assert_array_equal(n.toarray(),m.toarray())

def test_init_fast_sparse_matrix():
X = get_random_coo_matrix()
Y = X.tocsr()
Z = X.tocsc()
for M in [X,Y,Z]:
m = fast_sparse_matrix(M)
assert_array_equal(m.X.toarray(),M.toarray())
assert_equal(m.shape,M.shape)

def test_fast_get_col():
X = get_random_coo_matrix().tocsc()
m = fast_sparse_matrix(X)
rows,cols = X.shape
for j in xrange(cols):
assert_array_equal(m.fast_get_col(j).toarray(),X[:,j].toarray())

def test_fast_update_col():
X = get_random_coo_matrix().tocsc()
m = fast_sparse_matrix(X)
rows,cols = X.shape
for j in xrange(cols):
col = m.fast_get_col(j)
new_vals = []
for i in X[:,j].indices:
new_vals.append(X[i,j]+1)
m.fast_update_col(j,new_vals)
expected = X[:,j].toarray()
for i in xrange(expected.shape[0]):
if expected[i] > 0:
expected[i] += 1
assert_array_equal(m.fast_get_col(j).toarray(),expected)

def test_save_load():
"""Save to file as arrays in numpy binary format."""
X = get_random_coo_matrix()
m = fast_sparse_matrix(X)
f,path = tempfile.mkstemp(suffix='.npz')
m.save(path)
n = fast_sparse_matrix.load(path)
os.remove(path)
assert_equal(m.shape,n.shape)
assert_array_equal(m.X.toarray(),n.X.toarray())
assert_array_equal(m.col_view.toarray(),n.col_view.toarray())

0 comments on commit e91c086

Please sign in to comment.