#Mat_hanlder.py

The `mat_handler.py` module contains `matrix` class, which is the backbone of `pyemu`.  The `matrix` class overloads all common mathematical operators and also uses an "auto-align" functionality to line up matrix objects for multiplication, addition, etc. 



In [4]:
from __future__ import print_function
import os
import numpy as np
from mat_handler import matrix, cov

Here is the most basic instantiation of the `matrix` class:

In [5]:
m = matrix()

Here we will generate a `matrix` object with a random ndarray

In [7]:
a = np.random.random((5, 5))
row_names = []
[row_names.append("row_{0:02d}".format(i)) for i in range(5)]
col_names = []
[col_names.append("col_{0:02d}".format(i)) for i in range(5)]
m = matrix(x=a, row_names=row_names, col_names=col_names)
print(m)

row names: ['row_00', 'row_01', 'row_02', 'row_03', 'row_04']
col names: ['col_00', 'col_01', 'col_02', 'col_03', 'col_04']
[[ 0.08791102  0.95028496  0.44800621  0.12231117  0.91329927]
 [ 0.71335391  0.71420174  0.10496111  0.63835796  0.35291927]
 [ 0.16778418  0.10886583  0.86851255  0.68595219  0.30715702]
 [ 0.11102555  0.00589094  0.74617443  0.52127884  0.34070508]
 [ 0.32784154  0.44609205  0.43907851  0.3882215   0.97062915]]


#File I/O with `matrix`
`matrix` supports several PEST-compatible I/O routines as well as some others:

In [8]:
ascii_name = "mat_test.mat"
m.to_ascii(ascii_name)
m2 = matrix()
m2.from_ascii(ascii_name)
print(m2)

row names: ['row_00', 'row_01', 'row_02', 'row_03', 'row_04']
col names: ['col_00', 'col_01', 'col_02', 'col_03', 'col_04']
[[ 0.08791103  0.95028496  0.44800621  0.12231117  0.91329927]
 [ 0.71335391  0.71420174  0.10496111  0.63835796  0.35291927]
 [ 0.16778418  0.10886583  0.86851255  0.68595219  0.30715702]
 [ 0.11102555  0.00589094  0.74617443  0.52127884  0.34070508]
 [ 0.32784154  0.44609205  0.43907851  0.3882215   0.97062915]]


In [10]:
bin_name = "mat_test.bin"
m.to_binary(bin_name)
m3 = matrix()
m3.from_binary(bin_name)
print(m3)

TypeError: 'zip' does not support the buffer interface

`matrix` also implements a `to_dataframe()` and a `to_sparse`, which return `pandas dataframe` and a `scipy.sparse` (compressed sparse row) objects, respectively:

In [12]:
print(type(m.to_dataframe()))
print(type(m.to_sparse()))
m.to_dataframe() #looks really nice in the notebook!

<class 'pandas.core.frame.DataFrame'>
<class 'scipy.sparse.csr.csr_matrix'>


Unnamed: 0,col_00,col_01,col_02,col_03,col_04
row_00,0.087911,0.950285,0.448006,0.122311,0.913299
row_01,0.713354,0.714202,0.104961,0.638358,0.352919
row_02,0.167784,0.108866,0.868513,0.685952,0.307157
row_03,0.111026,0.005891,0.746174,0.521279,0.340705
row_04,0.327842,0.446092,0.439079,0.388221,0.970629


#Convience methods of `matrix`

several cool things are implemented in `matrix` and accessed through `@property` decorated methods.  For example, the SVD components of a `matrix` object are simply accessed by name.  The SVD routine is called on demand and the components are cast to `matrix` objects, all opaque to the user:

In [13]:
print(m.s) #the singular values of m cast into a matrix object.  the SVD() is called on demand...
m.s.to_ascii("test_sv.mat") #save the singular values to a PEST-compatible ASCII file

row names: ['sing_val_1', 'sing_val_2', 'sing_val_3', 'sing_val_4', 'sing_val_5']
col names: ['sing_val_1', 'sing_val_2', 'sing_val_3', 'sing_val_4', 'sing_val_5']
[[ 2.37454194]
 [ 1.04643439]
 [ 0.76242284]
 [ 0.34040994]
 [ 0.01371905]]


In [14]:
m.v.to_ascii("test_v.mat") #the right singular vectors of m.
m.u.to_dataframe()# a data frame of the left singular vectors of m

Unnamed: 0,left_sing_vec_1,left_sing_vec_2,left_sing_vec_3,left_sing_vec_4,left_sing_vec_5
row_00,-0.522731,-0.484323,0.47828,-0.509056,0.065518
row_01,-0.435025,-0.25956,-0.850255,-0.090995,0.110307
row_02,-0.405026,0.622467,-0.031065,-0.28349,-0.605935
row_03,-0.334096,0.541706,0.102335,0.022186,0.764179
row_04,-0.511701,-0.130958,0.192032,0.807294,-0.180035


The matrix inverse operation is accessed the same way, but requires a square matrix:

In [15]:
m.inv.to_dataframe()

Unnamed: 0,col_00,col_01,col_02,col_03,col_04
row_00,2.209231,5.753375,-27.950736,34.844121,-7.556408
row_01,1.030882,0.077795,2.621131,-3.283449,-0.675201
row_02,2.555754,3.059519,-18.06036,24.076345,-6.253159
row_03,-3.503411,-4.613829,28.504399,-35.344917,8.360391
row_04,-0.974855,-1.517655,5.005045,-7.014398,3.377655


#Manipulating `matrix` shape
`matrix` has lots of functionality to support getting submatrices by row and col names:

In [17]:

print(m.get(row_names="row_00",col_names=["col_01","col_03"]))

row names: ['row_00']
col names: ['col_01', 'col_03']
[[ 0.95028496  0.12231117]]


`extract()` calls `get()` then `drop()`:

In [18]:
from copy import deepcopy
m_copy = deepcopy(m)
sub_m = m_copy.extract(row_names="row_00",col_names=["col_01","col_03"])
m_copy.to_dataframe()
sub_m.to_dataframe()

Unnamed: 0,col_01,col_03
row_00,0.950285,0.122311


#Operator overloading
The operator overloading uses the auto-align functionality as well as the `isdiagonal` flag for super easy linear algebra.  The "inner join" of the two objects is found and the rows and cols are aligned appropriately:

In [19]:
#a new matrix object that is not "aligned" with m
row_names = ["row_03","row_02","row_00"]
col_names = ["col_01","col_10","col_100"]
m_mix = matrix(x=np.random.random((3,3)),row_names=row_names,col_names=col_names)
m_mix.to_dataframe()


Unnamed: 0,col_01,col_10,col_100
row_03,0.056911,0.786522,0.146138
row_02,0.735065,0.70696,0.488473
row_00,0.812376,0.846878,0.153442


In [20]:
m.to_dataframe()

Unnamed: 0,col_00,col_01,col_02,col_03,col_04
row_00,0.087911,0.950285,0.448006,0.122311,0.913299
row_01,0.713354,0.714202,0.104961,0.638358,0.352919
row_02,0.167784,0.108866,0.868513,0.685952,0.307157
row_03,0.111026,0.005891,0.746174,0.521279,0.340705
row_04,0.327842,0.446092,0.439079,0.388221,0.970629


In [21]:
prod = m * m_mix.T
prod.to_dataframe()

Unnamed: 0,row_03,row_02,row_00
row_00,0.054081,0.698521,0.771988
row_01,0.040646,0.524985,0.5802
row_02,0.006196,0.080023,0.08844
row_03,0.000335,0.00433,0.004786
row_04,0.025387,0.327907,0.362394


In [22]:
prod2 = m_mix.T * m
prod2.to_dataframe()

Unnamed: 0,col_00,col_01,col_02,col_03,col_04
col_01,0.201068,0.852347,1.044828,0.633249,0.987112
col_10,0.280391,0.886373,1.580293,0.998521,1.258573
col_100,0.111672,0.199852,0.602033,0.430016,0.339966


In [23]:
(m_mix + m).to_dataframe()

Unnamed: 0,col_01
row_03,0.062802
row_02,0.843931
row_00,1.762661


#The `cov` derived type
The `cov` type of `mat_handler` is designed specifically to handle covariance matrices.  It makes some assumptions, such as the symmetry (and accordingly that row_names == col_names). 

In [24]:
c = cov(m.newx,m.row_names)

The `cov` class supports several additional I/O routines, including the PEST uncertainty file (.unc):

In [25]:
c.to_uncfile("test.unc")

In [27]:
c1 = cov()
c1.from_uncfile("test.unc")
print(c1)

row names: ['row_00', 'row_01', 'row_02', 'row_03', 'row_04']
col names: ['row_00', 'row_01', 'row_02', 'row_03', 'row_04']
[[ 0.08791103  0.95028496  0.44800621  0.12231117  0.91329927]
 [ 0.71335391  0.71420174  0.10496111  0.63835796  0.35291927]
 [ 0.16778418  0.10886583  0.86851255  0.68595219  0.30715702]
 [ 0.11102555  0.00589094  0.74617443  0.52127884  0.34070508]
 [ 0.32784154  0.44609205  0.43907851  0.3882215   0.97062915]]


We can also build `cov` objects implied by pest control file parameter bounds or observation weights:

In [28]:
parcov = cov()
parcov.from_parbounds(os.path.join("henry","pest.pst"))
obscov = cov()
obscov.from_obsweights(os.path.join("henry","pest.pst"))

In [29]:
parcov.to_dataframe() #to_dataframe for diagonal types builds a full matrix dataframe - can be costly

Unnamed: 0,mult1,kr01c01,kr01c02,kr01c03,kr01c04,kr01c05,kr01c06,kr01c07,kr01c08,kr01c09,...,kr10c51,kr10c52,kr10c53,kr10c54,kr10c55,kr10c56,kr10c57,kr10c58,kr10c59,kr10c60
mult1,0.022655,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c01,0.000000,0.25,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c02,0.000000,0.00,0.25,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c03,0.000000,0.00,0.00,0.25,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c04,0.000000,0.00,0.00,0.00,0.25,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c05,0.000000,0.00,0.00,0.00,0.00,0.25,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c06,0.000000,0.00,0.00,0.00,0.00,0.00,0.25,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c07,0.000000,0.00,0.00,0.00,0.00,0.00,0.00,0.25,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c08,0.000000,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.25,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
kr01c09,0.000000,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.25,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00


In [30]:
obscov.to_dataframe()# notice the zero-weight obs have been assigned a really large uncertainty

Unnamed: 0,h_obs01_1,h_obs01_2,h_obs02_1,h_obs02_2,h_obs03_1,h_obs03_2,h_obs04_1,h_obs04_2,h_obs05_1,h_obs05_2,...,c_obs12_2,c_obs13_1,c_obs13_2,c_obs14_1,c_obs14_2,c_obs15_1,c_obs15_2,pd_one,pd_ten,pd_half
h_obs01_1,0.000043,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs01_2,0.000000,1.000000e+60,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs02_1,0.000000,0.000000e+00,0.000043,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs02_2,0.000000,0.000000e+00,0.000000,1.000000e+60,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs03_1,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000043,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs03_2,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,1.000000e+60,0.000000,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs04_1,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000043,0.000000e+00,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs04_2,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,1.000000e+60,0.000000,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs05_1,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000043,0.000000e+00,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
h_obs05_2,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000,1.000000e+60,...,0.000000e+00,0.0000,0.000000e+00,0.00000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00
