# NumPy 的 matrix 类

In [2]:
import numpy as np

In [3]:
X = np.random.randn(4, 4)

In [4]:
X

array([[-2.40548499,  1.37805667, -0.01911392,  1.4639204 ],
       [ 0.826417  , -1.52044687,  0.12776436,  1.54065964],
       [-1.20665949,  0.5092859 , -1.3601811 , -0.94645261],
       [-0.02962913,  0.17889911,  1.01584549, -1.55881591]])

In [6]:
y = X[:, :1]

In [7]:
X

array([[-2.40548499,  1.37805667, -0.01911392,  1.4639204 ],
       [ 0.826417  , -1.52044687,  0.12776436,  1.54065964],
       [-1.20665949,  0.5092859 , -1.3601811 , -0.94645261],
       [-0.02962913,  0.17889911,  1.01584549, -1.55881591]])

In [8]:
y

array([[-2.40548499],
       [ 0.826417  ],
       [-1.20665949],
       [-0.02962913]])

In [9]:
np.dot(y.T, np.dot(X, y))

array([[-25.4521853]])

In [10]:
np.dot(X, y)

array([[ 6.90489682],
       [-3.44427332],
       [ 4.9928018 ],
       [-0.96047555]])

In [11]:
Xm = np.matrix(X)

In [12]:
ym = Xm[:, 0]

In [13]:
Xm

matrix([[-2.40548499,  1.37805667, -0.01911392,  1.4639204 ],
        [ 0.826417  , -1.52044687,  0.12776436,  1.54065964],
        [-1.20665949,  0.5092859 , -1.3601811 , -0.94645261],
        [-0.02962913,  0.17889911,  1.01584549, -1.55881591]])

In [14]:
ym

matrix([[-2.40548499],
        [ 0.826417  ],
        [-1.20665949],
        [-0.02962913]])

In [15]:
ym.T * Xm * ym

matrix([[-25.4521853]])

In [16]:
Xm.I

matrix([[-0.35539782, -0.5535578 , -0.48496727, -0.58641931],
        [-0.0372028 , -0.95540474, -0.5645459 , -0.63644431],
        [ 0.20614689,  0.13920337, -0.32864952,  0.53072262],
        [ 0.13682691, -0.00841056, -0.26974625, -0.35754835]])

In [17]:
Xm.I * Xm

matrix([[  1.00000000e+00,   0.00000000e+00,  -1.11022302e-16,
           1.66533454e-16],
        [  1.11022302e-16,   1.00000000e+00,  -2.22044605e-16,
           4.99600361e-16],
        [  8.32667268e-17,  -5.55111512e-17,   1.00000000e+00,
          -2.22044605e-16],
        [  5.76795556e-17,  -1.38777878e-17,   0.00000000e+00,
           1.00000000e+00]])

# 高级数组输入输出

## 内存映像文件

In [18]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(10000, 10000))

In [19]:
mmap

memmap([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [20]:
section = mmap[:5]

In [21]:
section[:] = np.random.randn(5, 10000)

In [23]:
mmap.flush()

In [24]:
mmap

memmap([[ 0.88044869,  1.40057701, -1.73986662, ...,  1.08356244,
         0.87726008, -0.41127651],
       [ 0.04396808,  1.94078257,  2.33590916, ..., -0.73248696,
         0.18581784, -0.2254842 ],
       [-0.97653542, -0.47446806,  0.05884199, ...,  1.00957699,
        -0.26135453,  0.33832396],
       ..., 
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [25]:
del mmap

In [26]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))

In [27]:
mmap

memmap([[ 0.88044869,  1.40057701, -1.73986662, ...,  1.08356244,
         0.87726008, -0.41127651],
       [ 0.04396808,  1.94078257,  2.33590916, ..., -0.73248696,
         0.18581784, -0.2254842 ],
       [-0.97653542, -0.47446806,  0.05884199, ...,  1.00957699,
        -0.26135453,  0.33832396],
       ..., 
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

## HDF5 及其他数组存储方式

# 性能建议

## 连续内存的重要性 

In [28]:
arr_c = np.ones((1000, 1000), order='C')

In [29]:
arr_f = np.ones((1000, 1000), order='F')

In [30]:
arr_c.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [31]:
arr_f.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [32]:
arr_f.flags.f_contiguous

True

In [33]:
%timeit arr_c.sum(1)

1000 loops, best of 3: 571 µs per loop


In [34]:
%timeit arr_f.sum(1)

1000 loops, best of 3: 663 µs per loop


In [35]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [36]:
arr_c[:50].flags.contiguous

True

In [37]:
arr_c[:, :50].flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

## 其他加速手段：Cython、f2py、C

    from numpy cimport ndarray, float64_t

    def sum_elements(ndarray[float64_t] arr):
        cdef Py_ssizet, i, n = len(arr)
        cdef float64_t result = 0

        for i in range(n):
            result += arr[i]

        return result