In [1]:
import numpy as np
import matplotlib.pyplot as plt
import numpy.random as rnd
from numpy.linalg import cholesky
%matplotlib inline

In [2]:
def genData(M, N = 100):
    rnd.seed(0)
    Sigma = np.array([[1, 0], [0, 1]])
    R = cholesky(Sigma)
    
    mu = np.array([[-M, 0]])
    s1 = np.dot(np.random.randn(N, 2), R) + mu
    mu = np.array([[M, 0]])
    s2 = np.dot(np.random.randn(N, 2), R) + mu
    
    one = np.ones(s1.shape[0]).reshape(100,1)
    
    s1 = np.concatenate((s1, -1 * one), axis = 1)
    s2 = np.concatenate((s2,  1 * one), axis = 1)
    s_t = np.concatenate((s1, s2), axis=0)
    s = np.concatenate((np.ones((s_t.shape[0],1)), s_t), axis=1)
    
    return s

In [5]:
S = genData(5)
print(S[:3], S[-3:])

[[ 1.         -3.23594765  0.40015721 -1.        ]
 [ 1.         -4.02126202  2.2408932  -1.        ]
 [ 1.         -3.13244201 -0.97727788 -1.        ]] [[ 1.          4.70816264 -0.76149221  1.        ]
 [ 1.          5.85792392  1.14110187  1.        ]
 [ 1.          6.46657872  0.85255194  1.        ]]


In [4]:
def genPlt(S, w, plt):
    y = np.linspace(-3,3)
    x = (w[0] + w[2] * y)/w[1]

    plt.plot(S[:101,1],S[:101,2],'+')
    plt.plot(S[101:,1],S[101:,2],'o')
    plt.plot(x,y)
    
    return plt

In [27]:
def cal_cov_and_avg(samples):
    """
    给定一个类别的数据，计算协方差矩阵和平均向量
    :param samples: 
    :return: 
    """
    u1 = np.mean(samples, axis=0)
    cov_m = np.zeros((samples.shape[1], samples.shape[1]))
    for s in samples:
        t = s - u1
        cov_m += t * t.T
    return cov_m, u1


def fisher(c_1, c_2):
    """
    fisher算法实现(请参考上面推导出来的公式，那个才是精华部分)
    :param c_1: 
    :param c_2: 
    :return: 
    """
    cov_1, u1 = cal_cov_and_avg(c_1)
    cov_2, u2 = cal_cov_and_avg(c_2)
    s_w = cov_1 + cov_2
    # u, s, v = np.linalg.svd(s_w)  # 奇异值分解
    # s_w_inv = np.dot(np.dot(v.T, np.linalg.inv(np.diag(s))), u.T)
    s_w_inv = np.linalg.pinv(s_w)
    return np.dot(s_w_inv, u1 - u2)

In [29]:
cov1, u1 = cal_cov_and_avg(S[:S.shape[0]//2, :-1])
print(cov1, '\n\n\n\n\n', u1)
cov2, u2 = cal_cov_and_avg(S[S.shape[0]//2:, :-1])
print(cov2, '\n\n\n\n\n', u2)

[[  0.         104.43979414 103.18809343]
 [  0.         104.43979414 103.18809343]
 [  0.         104.43979414 103.18809343]] 




 [ 1.         -5.00095768  0.14277867]
[[ 0.         92.86441447 85.03739282]
 [ 0.         92.86441447 85.03739282]
 [ 0.         92.86441447 85.03739282]] 




 [ 1.          4.85692167 -0.11295063]


In [40]:
# w = fisher(S[:S.shape[0]//2, :-1],S[S.shape[0]//2:, :-1])
s_w = cov1 + cov2
np.linalg.pinv(s_w)

array([[0.        , 0.        , 0.        ],
       [0.00088448, 0.00088448, 0.00088448],
       [0.00084378, 0.00084378, 0.00084378]])