## Iris鸢尾花分类
采用产生式分类器对鸢尾花进行分类

In [1]:
import pandas as pd
import numpy as np

from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# 模型性能的评价,本案例为分类问题，用正确率作为模型性能评价指标
from sklearn.metrics import accuracy_score 

#作图
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline

#显示中文
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']

In [2]:
#读取数据
# csv文件没有列名，增加列名
feat_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'species']

dpath = "./data/"
df = pd.read_csv(dpath + "iris.csv", names = feat_names)

#通过观察前5行，了解数据每列（特征）的概况
df.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
#标签字符串映射为整数（在此并不一定需要）
target_map = {'Iris-setosa':0, 
              'Iris-versicolor':1,
              'Iris-virginica':2 }  #2

# Use the pandas apply method to numerically encode our attrition target variable
df['species'] = df['species'].apply(lambda x: target_map[x])

In [4]:
# 从原始数据中分离输入特征x和输出y
y = df['species']
X = df.drop('species', axis = 1)

In [5]:
#无需特征缩放
#将数据分割训练数据与测试数据
#分类数据，跳用分层采样
from sklearn.model_selection import train_test_split

# 随机采样20%的数据构建测试集，其余作为训练样本
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4, stratify=y ) 

### 朴素贝叶斯

In [6]:
clf = GaussianNB()

clf.fit(X_train, y_train)

y_test_pred = clf.predict((X_test))
acc = accuracy_score(y_test, y_test_pred)
acc

1.0

In [7]:
print('类先验：')
print(clf.class_prior_ )                   

print('均值：')
print(clf.theta_)                           #各类在各个特征上的均值

print('标准差：')
print(clf.sigma_)                           #各类在各个特征上的标准差

类先验：
[ 0.33333333  0.33333333  0.33333333]
均值：
[[ 5.015   3.44    1.4525  0.2425]
 [ 5.9775  2.805   4.27    1.32  ]
 [ 6.5875  2.9625  5.5425  2.005 ]]
标准差：
[[ 0.136775    0.1509      0.03199375  0.01094375]
 [ 0.26524375  0.089475    0.2176      0.0401    ]
 [ 0.41859375  0.11384375  0.28644375  0.076475  ]]


### QDA

In [8]:
clf = QuadraticDiscriminantAnalysis()

clf.fit(X_train, y_train)

y_test_pred = clf.predict((X_test))
acc = accuracy_score(y_test, y_test_pred)
acc

0.93333333333333335

In [9]:
print('类先验：')
print(clf.priors_ )                   

print('均值：')
print(clf.means_)                           

print('协方差矩阵：')
print(clf.covariances_)

print('主轴方向：')
print(clf.rotations_)

print("主轴方向的方差:")
print(clf.scalings_)

类先验：
[ 0.33333333  0.33333333  0.33333333]
均值：
[[ 5.015   3.44    1.4525  0.2425]
 [ 5.9775  2.805   4.27    1.32  ]
 [ 6.5875  2.9625  5.5425  2.005 ]]
协方差矩阵：




AttributeError: 'QuadraticDiscriminantAnalysis' object has no attribute 'covariance_'