创建演示数据：

In [1]:
import numpy as np
from sklearn.datasets import make_classification

X, y = make_classification(n_features = 2, n_informative = 2, n_redundant = 0, n_samples = 1000, n_classes = 2, random_state = 0)

演示数据：

In [2]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x1 = X[y==0][:, 0]
y1 = X[y==0][:, 1]
x2 = X[y==1][:, 0]
y2 = X[y==1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

ax.set_title('二次判别分析', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ["0", "1"], loc="upper left")
plt.show()

<IPython.core.display.Javascript object>

In [3]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# 初始化二次判别分析器
qda = QuadraticDiscriminantAnalysis()
# 拟合数据
qda.fit(X, y)
# 预测
result_sklearn = qda.predict(X)
print(result_sklearn)

[0 1 1 1 1 1 0 1 0 0 1 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 0 1 0 1 0 1 1 1 1 0 1
 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 1 0 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 1 0 1 0 1
 0 0 1 1 0 0 1 1 1 1 1 1 0 0 1 1 1 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1
 1 0 0 1 1 1 0 1 1 1 1 1 1 0 1 0 0 0 1 1 0 0 1 0 0 1 0 0 0 1 1 1 1 1 1 0 0
 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 0 1 0 0 0 0 1 1 1 1 1 1 1 0 1 1 1 0 0 1 1
 0 0 0 0 1 0 1 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 1 0 0 1 0 1 1 1 1
 1 0 1 0 0 0 0 1 0 0 0 1 1 1 0 0 0 1 1 1 0 0 1 1 1 1 1 1 0 0 1 1 0 0 0 0 0
 0 0 1 1 1 0 1 1 1 0 0 1 1 0 1 1 0 1 1 0 0 1 0 0 1 1 0 1 0 0 0 1 1 1 1 0 0
 0 0 1 1 0 1 1 0 1 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 1 1 0 1 1 0 1 0 1 0 0
 0 1 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0 0 0 1 1 0 1 1 1 0 1
 1 1 0 1 0 0 1 1 1 0 1 0 0 0 1 0 1 1 0 1 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 0
 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1
 1 0 1 0 0 1 0 0 1 1 0 0 0 0 0 0 0 1 0 1 1 0 1 1 1 0 0 1 0 0 0 1 1 1 1 0 0
 1 0 0 1 1 1 1 1 0 1 0 1 

可视化：

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, .01), np.arange(y_min, y_max, .01))
Z = qda.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
clist=['#ffadad', '#8ecae6']
newcmp = LinearSegmentedColormap.from_list('point_color', clist)
plt.pcolormesh(xx, yy, Z, cmap = newcmp)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

x1 = X[y==0][:, 0]
y1 = X[y==0][:, 1]
x2 = X[y==1][:, 0]
y2 = X[y==1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

ax.set_title('二次判别分析', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ["0", "1"], loc="upper left")
plt.show()

<IPython.core.display.Javascript object>



In [5]:
def qda(X, y):
    """
    二次判别分析（QDA）
    args:
        X - 训练数据集
        y - 目标标签值
    return:
        y_classes - 标签类别
        priors - 每类先验概率
        means - 每类均值向量
        sigmags - 每类协方差矩阵
        dets - 每类协方差矩阵行列式
    """
    # 标签值
    y_classes = np.unique(y)
    # 每类先验概率
    priors = []
    # 每类均值向量
    means = []
    # 每类协方差矩阵
    sigmags = []
    # 每类协方差矩阵行列式
    dets = []
    for idx in range(len(y_classes)):
        c = X[y==y_classes[idx]][:]
        # 先验概率
        prior = c.shape[0] / X.shape[0]
        priors.append(prior)
        # 均值向量
        mu = np.mean(c, axis=0)
        means.append(mu)
        # 协方差矩阵
        sigma = c - mu
        sigma = sigma.T.dot(sigma) / c.shape[0]
        sigmags.append(np.linalg.pinv(sigma))
        # 协方差矩阵行列式
        dets.append(np.linalg.det(sigma))
    return y_classes, priors, means, sigmags, dets

def discriminant(X, y_classes, priors, means, sigmags, dets):
    """
    判别新样本点
    args:
        X - 数据集
        y_classes - 标签类别
        priors - 每类先验概率
        means - 每类均值向量
        sigmags - 每类协方差矩阵
        dets - 每类协方差矩阵行列式
    return:
        分类结果
    """
    ps = []
    for idx in range(len(y_classes)):
        x = X - means[idx]
        p = - 0.5 * (np.sum(np.multiply(x.dot(sigmags[idx]), x), axis=1) + np.log(dets[idx])) + priors[idx]
        ps.append(p)
    return y_classes.take(np.array(ps).T.argmax(1))

拟合数据：

In [6]:
y_classes, priors, means, sigmags, dets = qda(X, y)
result = discriminant(X, y_classes, priors, means, sigmags, dets)
print(result)
print(np.allclose(result, result_sklearn))

[0 1 1 1 1 1 0 1 0 0 1 1 1 0 0 0 1 1 1 1 0 0 0 0 1 0 0 1 0 1 0 1 1 1 1 0 1
 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 1 0 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 1 0 1 0 1
 0 0 1 1 0 0 1 1 1 1 1 1 0 0 1 1 1 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1
 1 0 0 1 1 1 0 1 1 1 1 1 1 0 1 0 0 0 1 1 0 0 1 0 0 1 0 0 0 1 1 1 1 1 1 0 0
 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 0 1 0 0 0 0 1 1 1 1 1 1 1 0 1 1 1 0 0 1 1
 0 0 0 0 1 0 1 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 1 0 0 1 0 1 1 1 1
 1 0 1 0 0 0 0 1 0 0 0 1 1 1 0 0 0 1 1 1 0 0 1 1 1 1 1 1 0 0 1 1 0 0 0 0 0
 0 0 1 1 1 0 1 1 1 0 0 1 1 0 1 1 0 1 1 0 0 1 0 0 1 1 0 1 0 0 0 1 1 1 1 0 0
 0 0 1 1 0 1 1 0 1 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 1 1 0 1 1 0 1 0 1 0 0
 0 1 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0 0 0 1 1 0 1 1 1 0 1
 1 1 0 1 0 0 1 1 1 0 1 0 0 0 1 0 1 1 0 1 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 0 0
 1 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1
 1 0 1 0 0 1 0 0 1 1 0 0 0 0 0 0 0 1 0 1 1 0 1 1 1 0 0 1 0 0 0 1 1 1 1 0 0
 1 0 0 1 1 1 1 1 0 1 0 1 

可视化：

In [7]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

%matplotlib notebook

plt.rcParams['font.sans-serif'] = ['PingFang HK']  # 选择一个本地的支持中文的字体
fig, ax = plt.subplots()
ax.set_facecolor('#f8f9fa')

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, .01), np.arange(y_min, y_max, .01))
result = discriminant(np.c_[xx.ravel(), yy.ravel()], y_classes, priors, means, sigmags, dets)
Z = Z.reshape(xx.shape)
clist=['#ffadad', '#8ecae6']
newcmp = LinearSegmentedColormap.from_list('point_color', clist)
plt.pcolormesh(xx, yy, Z, cmap = newcmp)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

x1 = X[y==0][:, 0]
y1 = X[y==0][:, 1]
x2 = X[y==1][:, 0]
y2 = X[y==1][:, 1]
p1 = plt.scatter(x1, y1, c='#e63946', marker='o', s=20)
p2 = plt.scatter(x2, y2, c='#457b9d', marker='x', s=20)

ax.set_title('二次判别分析', color='#264653')
ax.set_xlabel('X1', color='#264653')
ax.set_ylabel('X2', color='#264653')
ax.tick_params(labelcolor='#264653')
plt.legend([p1, p2], ["0", "1"], loc="upper left")
plt.show()

<IPython.core.display.Javascript object>

