In [10]:
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
import scipy.sparse as sparse

# print(plt.style.available) # uncomment to print all styles
import seaborn as sns
sns.set(font_scale=2)
plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.figsize'] = (10,10)
%matplotlib inline

<img src="sparse.png" alt="Sparse" style="width: 300px;"/>

## Create a Sparse Matrix in COO

In [11]:
data = [1.9, -5.2, 4.4, 5.8, 3.6, 7.2, 2.7]
i    = [  0,    0,    2,   2,   2,   3,   3]
j    = [  1,    3,    0,   1,   2,   2,   3]
A = sparse.coo_matrix((data, (i, j)))

print(A)
print(A.todense())

A = A.tocsr()
print(A.data)
print(A.indptr)
print(A.indices)

In [12]:
def bmatrix(a):
    """Returns a LaTeX bmatrix

    :a: numpy array
    :returns: LaTeX bmatrix as a string
    """
    if len(a.shape) > 2:
        raise ValueError('bmatrix can at most display two dimensions')
    lines = str(a).replace('[', '').replace(']', '').splitlines()
    rv = [r'\begin{bmatrix}']
    rv += ['  ' + ' & '.join(l.split()) + r'\\' for l in lines]
    rv +=  [r'\end{bmatrix}']
    return '\n'.join(rv)

In [13]:
print(bmatrix(A.indices))

$$data = 
\begin{bmatrix}
  1.9 & -5.2 & 4.4 & 5.8 & 3.6 & 7.2 & 2.7\\
\end{bmatrix}$$

$$rowptr = 
\begin{bmatrix}
  0 & 2 & 2 & 5 & 7\\
\end{bmatrix}
$$

$$col = 
\begin{bmatrix}
  1 & 3 & 0 & 1 & 2 & 2 & 3\\
\end{bmatrix}
$$

$$A  = \begin{bmatrix}
  0. & 1.9 & 0. & -5.2\\
  0. & 0. & 0. & 0.\\
  4.4 & 5.8 & 3.6 & 0.\\
  0. & 0. & 7.2 & 2.7\\
\end{bmatrix}$$

In [14]:
data = [-5.2, 1.9, 0.3, 9.1, 4.4, 5.8, 3.6, 7.2, 2.7]
i    = [   0,   0,   1,   1,   2,   2,   2,   3,   3]
j    = [   3,   1,   0,   2,   0,   1,   2,   2,   3]
A = sparse.coo_matrix((data, (i, j)))
print(A.todense())

In [15]:
print(A.data)
print(A.data.dtype, 'Length: ', len(A.data))
print('-')
print(A.row)
print(A.row.dtype, 'Length: ', len(A.row))
print('-')
print(A.col)
print(A.col.dtype, 'Length: ', len(A.row))

## Convert to CSR

In [16]:
A = A.tocsr()
print(A)
print(A.todense())

In [17]:
print(A.data)
print(A.data.dtype, 'Length: ', len(A.data))
print('-')
print(A.indptr)
print(A.indptr.dtype, 'Length: ', len(A.indptr))
print('-')
print(A.indices)
print(A.indices.dtype, 'Length: ', len(A.indices))

## Try some timings: small, `Harvard500`

In [18]:
import scipy.io as sio
d = sio.loadmat('Harvard500.mat')
A = d['Problem'][0][0][2].tocsr()

In [19]:
A

In [20]:
plt.figure(figsize=(10,10))
plt.spy(A, ms=5)

In [21]:
A.shape[0]

In [22]:
v = np.random.rand(A.shape[0])
w = np.random.rand(A.shape[0])

In [23]:
%timeit v = A * w

In [24]:
Adense = A.todense()

In [25]:
%timeit v = Adense.dot(w)

## Medium `wb-cs-stanford`

In [26]:
d = sio.loadmat('wb-cs-stanford.mat')
A = d['Problem'][0][0][2].tocsr()

In [27]:
plt.figure(figsize=(10,10))
plt.spy(A, ms=5)

In [28]:
A

In [29]:
v = np.random.rand(A.shape[0])
w = np.random.rand(A.shape[0])

In [30]:
%timeit v = A * w

In [31]:
Adense = A.todense()

In [32]:
%timeit v = Adense.dot(w)

## Large `email-Enron`

In [33]:
d = sio.loadmat('email-Enron.mat')
A = d['Problem'][0][0][2].tocsr()

In [34]:
plt.figure(figsize=(10,10))
plt.spy(A, ms=5)

In [35]:
A

In [36]:
v = np.random.rand(A.shape[0])
w = np.random.rand(A.shape[0])

In [37]:
%timeit v = A * w

In [38]:
Adense = A.todense()

In [None]:
%timeit v = Adense.dot(w)