In [3]:
import numpy as np
import pandas as pd

import scipy.sparse as sp

In [2]:
with open("./data/pemsd7-m/vel.csv", encoding='utf-8') as f:
    data = np.loadtxt(f, delimiter=',')
    print(data.shape)

(12672, 228)


# 稀疏矩阵

In [5]:
data = sp.load_npz("./data/pemsd7-m/adj.npz")
print(data.shape)
print(data)

(228, 228)
  (0, 0)	1.0
  (1, 0)	0.874723658573536
  (2, 0)	0.3612846282098772
  (3, 0)	0.150751115862349
  (4, 0)	0.44773753416261347
  (7, 0)	0.9366682108438592
  (8, 0)	0.6294460508233404
  (11, 0)	0.7839333057097398
  (12, 0)	0.3309976422216514
  (15, 0)	0.6802770768089623
  (58, 0)	0.10052330878754748
  (65, 0)	0.10478651398778062
  (68, 0)	0.12590298087652654
  (108, 0)	0.5608901322885123
  (109, 0)	0.6899634477684644
  (112, 0)	0.20452601018760724
  (114, 0)	0.32572485193348727
  (115, 0)	0.8756092297080661
  (116, 0)	0.1010639188922501
  (118, 0)	0.30866295600959187
  (120, 0)	0.7924221612667589
  (123, 0)	0.9318102066961166
  (124, 0)	0.4643323538620246
  (126, 0)	0.5328487422206488
  (127, 0)	0.8412964212185728
  :	:
  (185, 227)	0.406905517928299
  (186, 227)	0.15049343836804044
  (187, 227)	0.18220630019898112
  (188, 227)	0.3979505510953252
  (192, 227)	0.17455919974749026
  (193, 227)	0.4049386337428703
  (195, 227)	0.13101336046275158
  (197, 227)	0.13362495666231547
  (

# 压缩格式存储

In [6]:
import numpy as np
from scipy.sparse import csr_matrix, coo_matrix, csc_matrix


In [8]:
# 示例解读
indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
# 创建一个稀疏矩阵
sparse_matrix = csr_matrix((data, indices, indptr), shape=(3,3)).toarray()
print(sparse_matrix)
# 按 row 行来压缩
# 对于第i行，非0数据列是indices[indptr[i]:indptr[i+1]] 数据是data[indptr[i]:indptr[i+1]]
# 在本例中
# 第0行,有非0的数据列是indices[indptr[0]:indptr[1]] = indices[0:2] = [0,2]
# 数据是data[indptr[0]:indptr[1]] = data[0:2] = [1,2],所以在第0行第0列是1，第2列是2
# 第1行，有非0的数据列是indices[indptr[1]:indptr[2]] = indices[2:3] = [2]
# 数据是data[indptr[1]:indptr[2] = data[2:3] = [3],所以在第1行第2列是3
# 第2行，有非0的数据列是indices[indptr[2]:indptr[3]] = indices[3:6] = [0,12]
# 数据是data[indptr[2]:indptr[3]] = data[3:6] =[4,5,6],所以在第2行第0列是4，第1列是5,第2列是6

[[1 0 2]
 [0 0 3]
 [4 5 6]]


In [9]:
indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
# 创建一个稀疏矩阵
sparse_matrix = csc_matrix((data, indices, indptr), shape=(3,3)).toarray()
print(sparse_matrix)
# 按 col 列来压缩
# 对于第i列，非0数据列是indices[indptr[i]:indptr[i+1]] 数据是data[indptr[i]:indptr[i+1]]
# 在本例中
# 第0列,有非0的数据列是indices[indptr[0]:indptr[1]] = indices[0:2] = [0,2]
# 数据是data[indptr[0]:indptr[1]] = data[0:2] = [1,2],所以在第0列第0行是1，第2行是2
# 第1列，有非0的数据列是indices[indptr[1]:indptr[2]] = indices[2:3] = [2]
# 数据是data[indptr[1]:indptr[2] = data[2:3] = [3],所以在第1列第2行是3
# 第2列，有非0的数据列是indices[indptr[2]:indptr[3]] = indices[3:6] = [0,12]
# 数据是data[indptr[2]:indptr[3]] = data[3:6] =[4,5,6],所以在第2列第0行是4，第1行是5,第2行是6

[[1 0 4]
 [0 0 5]
 [2 3 6]]


In [11]:
row  = np.array([0, 0, 1, 3, 1, 0, 0])
col  = np.array([0, 2, 1, 3, 1, 0, 0])
data = np.array([2, 1, 3, 1, 2, 1, 4])
A = coo_matrix((data, (row, col)), shape=(4, 4))
print("-----------coo_matrix----------------")
print(A)

print("-----------toarray()----------------")
print(A.toarray())

B = A.tocsc()
print("-----------tocsc()----------------")
print(B)

C = B.todense()
print("-----------todense()----------------")
print(C)

-----------coo_matrix----------------
  (0, 0)	2
  (0, 2)	1
  (1, 1)	3
  (3, 3)	1
  (1, 1)	2
  (0, 0)	1
  (0, 0)	4
-----------toarray()----------------
[[7 0 1 0]
 [0 5 0 0]
 [0 0 0 0]
 [0 0 0 1]]
-----------tocsc()----------------
  (0, 0)	7
  (1, 1)	5
  (0, 2)	1
  (3, 3)	1
-----------todense()----------------
[[7 0 1 0]
 [0 5 0 0]
 [0 0 0 0]
 [0 0 0 1]]
