In [None]:
from sklearn import datasets
import seaborn as sns

In [None]:
iris = sns.load_dataset("iris")

In [None]:
type(iris)

In [None]:
iris.head()

In [None]:
iris.info()

In [None]:
iris.shape

In [None]:
iris.describe()

In [None]:
iris.species.value_counts()

In [None]:
sns.pairplot(data = iris, hue = "species")

In [None]:
iris_simple = iris.drop(["sepal_length", "sepal_width"], axis = 1)
iris_simple.head()

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
iris_simple["species"] = encoder.fit_transform(iris_simple["species"])

In [None]:
iris_simple

In [None]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [None]:
trans = StandardScaler()
_iris_simple = trans.fit_transform(iris_simple[["petal_length", "petal_width"]])
_iris_simple = pd.DataFrame(_iris_simple, columns = ["petal_length", "petal_width"])
_iris_simple.describe()

In [None]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(iris_simple, test_size = 0.2)
test_set.head()

In [None]:
iris_x_train = train_set[["petal_length", "petal_width"]]
iris_x_train.head()

In [None]:
iris_y_train = train_set["species"].copy()
iris_y_train.head()

In [None]:
iris_x_test = test_set[["petal_length", "petal_width"]]
iris_x_test.head()

In [None]:
iris_y_test = test_set["species"].copy()
iris_y_test.head()

# k近邻算法

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
clf = KNeighborsClassifier()
clf

In [None]:
clf.fit(iris_x_train, iris_y_train)

In [None]:
res = clf.predict(iris_x_test)
print(res)
print(iris_y_test.values)

In [None]:
encoder.inverse_transform(res)

In [None]:
accuracy = clf.score(iris_x_test, iris_y_test)
print("预测正确率:{:.0%}".format(accuracy))

In [None]:
out = iris_x_test.copy()
out["yy"] = iris_y_test
out["pre"] = res
out

In [None]:
out.to_csv("iris_predict.csv")

In [None]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
def draw(clf):
    M, N = 500, 500
    x1_min, x2_min = iris_simple[["petal_length", "petal_width"]].min(axis=0)
    x1_max, x2_max = iris_simple[["petal_length", "petal_width"]].max(axis=0)
    t1 = np.linspace(x1_min, x1_max, M)
    t2 = np.linspace(x2_min, x2_max, N)
    x1, x2 = np.meshgrid(t1, t2)

    x_show = np.stack((x1.flat, x2.flat), axis = 1)
    y_predict = clf.predict(x_show)

    cm_light = mpl.colors.ListedColormap(['#0000FF', '#FFFF00', '#00FF00'])
    cm_dark = mpl.colors.ListedColormap(["g", "r", "b"])

    plt.figure(figsize = (10,6))
    plt.pcolormesh(t1, t2, y_predict.reshape(x1.shape), cmap = cm_light)

    plt.scatter(iris_simple["petal_length"], iris_simple["petal_width"], label = None,
                c = iris_simple["species"], cmap = cm_dark, marker = 'o', edgecolors = 'k')
    plt.xlabel("petal_length")
    plt.ylabel("petal_width")

    color = ["g", "r", "b"]
    species = ["setosa", "virginica", "versicolor"]
    for i in range(3):
        plt.scatter([], [], c = color[i], s = 40, label = species[i])
    plt.legend(loc = "best")
    plt.title('iris_classfier')

In [None]:
draw(clf)

# 朴素贝叶斯算法

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
clf = GaussianNB()
clf

In [None]:
clf.fit(iris_x_train, iris_y_train)

In [None]:
res = clf.predict(iris_x_test)
print(res)
print(iris_y_test.values)

In [None]:
accuracy = clf.score(iris_x_test, iris_y_test)
print("预测正确率:{:.0%}".format(accuracy))

In [None]:
draw(clf)

# 决策数算法

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
clf = DecisionTreeClassifier()
clf

In [None]:
clf.fit(iris_x_train, iris_y_train)

In [None]:
res = clf.predict(iris_x_test)
print(res)
print(iris_y_test.values)

In [None]:
accuracy = clf.score(iris_x_test, iris_y_test)
print("预测正确率:{:.0%}".format(accuracy))

In [None]:
draw(clf)

# 逻辑回归算法

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
clf = LogisticRegression(solver='saga', max_iter = 1000)
clf

In [None]:
clf.fit(iris_x_train, iris_y_train)

In [None]:
res = clf.predict(iris_x_test)
print(res)
print(iris_y_test.values)

In [None]:
accuracy = clf.score(iris_x_test, iris_y_test)
print("预测正确率:{:.0%}".format(accuracy))

In [None]:
draw(clf)

# 支持向量机算法

In [None]:
from sklearn.svm import SVC

In [None]:
clf = SVC()
clf

In [None]:
clf.fit(iris_x_train, iris_y_train)

In [None]:
res = clf.predict(iris_x_test)
print(res)
print(iris_y_test.values)

In [None]:
accuracy = clf.score(iris_x_test, iris_y_test)
print("预测正确率:{:.0%}".format(accuracy))

In [None]:
draw(clf)

# 集成方法__随机森林

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
clf = RandomForestClassifier()
clf

In [None]:
clf.fit(iris_x_train, iris_y_train)

In [None]:
res = clf.predict(iris_x_test)
print(res)
print(iris_y_test.values)

In [None]:
accuracy = clf.score(iris_x_test, iris_y_test)
print("预测正确率:{:.0%}".format(accuracy))

In [None]:
draw(clf)

# 集成方法--Adaboost

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB

In [None]:
clf = AdaBoostClassifier()
clf

In [None]:
clf.fit(iris_x_train, iris_y_train)

In [None]:
res = clf.predict(iris_x_test)
print(res)
print(iris_y_test.values)

In [None]:
accuracy = clf.score(iris_x_test, iris_y_test)
print("预测正确率:{:.0%}".format(accuracy))

In [None]:
draw(clf)

# 集成方案--梯度提升树GBDT

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
clf = GradientBoostingClassifier()
clf

In [None]:
clf.fit(iris_x_train, iris_y_train)

In [None]:
res = clf.predict(iris_x_test)
print(res)
print(iris_y_test.values)

In [None]:
accuracy = clf.score(iris_x_test, iris_y_test)
print("预测正确率:{:.0%}".format(accuracy))

In [None]:
draw(clf)