In [1]:
# 人体运动状态信息评级.py
import numpy as np
import pandas as pd
import time

from sklearn.impute import SimpleImputer as Imputer  # 预处理模块
from sklearn.model_selection import train_test_split  # 自动生成训练集和测试集模块
from sklearn.metrics import classification_report  # 预测结果评估模块

from sklearn.neighbors import KNeighborsClassifier  # K近邻分类器
from sklearn.tree import DecisionTreeClassifier  # 决策树分类器
from sklearn.naive_bayes import GaussianNB  # 高斯朴素贝叶斯函数
from sklearn.utils import shuffle


def load_dataset(data_csv_path, eu_dist=1):  # 读取特征文件列表和标签文件列表的内容，归并后返回
    all_data = pd.read_csv(data_csv_path)
    all_data = all_data.fillna(all_data.mean()["EuDist":"SimDist"])
    all_data = shuffle(all_data)
    EuDist1 = all_data["EuDist"].values.tolist()
    EuDist = []
    for i in EuDist1:
        EuDist.append([i])

    SimDist1 = all_data["SimDist"].values.tolist()
    SimDist = []
    for i in SimDist1:
        SimDist.append([i])

    label = all_data["label"].values.tolist()

    if eu_dist == 1:
        return EuDist, label
    else:
        return SimDist, label


def main():
    ratio = 0.9
    X, y = load_dataset("train_data3.csv")
    X_train, y_train = X[:int(ratio * len(X))], y[:int(ratio * len(y))]  # 前四个数据作为训练集
    x_test, y_test = X[int(ratio * len(X)):], y[int(ratio * len(y)):]   # 最后一个数据作为测试集
    X_train, x_, y_train, y_ = train_test_split(X_train, y_train, test_size=0.1)  # 使用全量数据作为训练集，split函数打乱训练集

    knn_start = time.perf_counter()
    # 创建K近邻分类器，并在测试集上进行预测
    print("Start training knn")
    knn = KNeighborsClassifier().fit(X_train, y_train)
    print("Training done!")
    answer_knn = knn.predict(x_test)  # 得到分类结果
    print("Prediction done!")
    print("knn 用时：{:.2f}".format(time.perf_counter() - knn_start))

    dt_start = time.perf_counter()
    # 创建决策树分类器，并在测试集上进行预测
    print("Start training DT")
    dt = DecisionTreeClassifier().fit(X_train, y_train)
    print("Training done!")
    answer_dt = dt.predict(x_test)  # 得到分类结果
    print("Prediction done!")
    print("dt 用时：{:.2f}".format(time.perf_counter() - dt_start))

    gnb_start = time.perf_counter()
    # 创建贝叶斯分类器，并在测试集上进行预测
    print("Start training Bayes")
    gnb = GaussianNB().fit(X_train, y_train)
    print("Training done!")
    answer_gnb = gnb.predict(x_test)  # 得到分类结果
    print("Prediction done!")
    print("gnb 用时：{:.2f}".format(time.perf_counter() - gnb_start))

    # 计算准确率和召回率
    print("\n\nThe classification report for knn:")
    print(classification_report(y_test, answer_knn))

    print("\n\nThe classification report for dt:")
    print(classification_report(y_test, answer_dt))

    print("\n\nThe classification report for gnb:")
    print(classification_report(y_test, answer_gnb))


In [4]:
main()

Start training knn
Training done!
Prediction done!
knn 用时：0.00
Start training DT
Training done!
Prediction done!
dt 用时：0.00
Start training Bayes
Training done!
Prediction done!
gnb 用时：0.00


The classification report for knn:
              precision    recall  f1-score   support

           0       0.71      0.91      0.80        33
           1       0.83      0.56      0.67        27

    accuracy                           0.75        60
   macro avg       0.77      0.73      0.73        60
weighted avg       0.77      0.75      0.74        60



The classification report for dt:
              precision    recall  f1-score   support

           0       0.66      0.88      0.75        33
           1       0.75      0.44      0.56        27

    accuracy                           0.68        60
   macro avg       0.70      0.66      0.66        60
weighted avg       0.70      0.68      0.67        60



The classification report for gnb:
              precision    recall  f1-score   s

In [8]:
# 人体运动状态信息评级.py
import numpy as np
import pandas as pd
import time

from sklearn.model_selection import train_test_split  # 自动生成训练集和测试集模块
from sklearn.metrics import classification_report  # 预测结果评估模块

from sklearn.neighbors import KNeighborsClassifier  # K近邻分类器
from sklearn.tree import DecisionTreeClassifier  # 决策树分类器
from sklearn.naive_bayes import GaussianNB  # 高斯朴素贝叶斯函数
from sklearn.utils import shuffle


def load_dataset(data_csv_path, eu_dist=1):  # 读取特征文件列表和标签文件列表的内容，归并后返回
    all_data = pd.read_csv(data_csv_path)
    all_data = all_data.fillna(all_data.mean()["EuDist":"SimDist"])
    all_data = shuffle(all_data)
    EuDist1 = all_data["EuDist"].values.tolist()
    EuDist = []
    for i in EuDist1:
        EuDist.append([i])

    SimDist1 = all_data["SimDist"].values.tolist()
    SimDist = []
    for i in SimDist1:
        SimDist.append([i])

    label = all_data["label"].values.tolist()

    if eu_dist == 1:
        return EuDist, label
    else:
        return SimDist, label



ratio = 0.8
X, y = load_dataset("train_data3.csv")
X_train, y_train = X[:int(ratio * len(X))], y[:int(ratio * len(y))]  # 前四个数据作为训练集
x_test, y_test = X[int(ratio * len(X)):], y[int(ratio * len(y)):]   # 最后一个数据作为测试集
X_train, x_, y_train, y_ = train_test_split(X_train, y_train, test_size=0.1)  # 使用全量数据作为训练集，split函数打乱训练集

knn_start = time.perf_counter()
# 创建K近邻分类器，并在测试集上进行预测
print("Start training knn")
knn = KNeighborsClassifier().fit(X_train, y_train)
print("Training done!")
knn_predict = knn.predict(x_test)  # 得到分类结果
knn_predict_proba=knn.predict_proba(x_test)
print(knn_predict)
print(knn_predict_proba.max(axis=1))
print("Prediction done!")
print("knn 用时：{:.2f}".format(time.perf_counter() - knn_start))

dt_start = time.perf_counter()
# 创建决策树分类器，并在测试集上进行预测
print("Start training DT")
dt = DecisionTreeClassifier().fit(X_train, y_train)
print("Training done!")
answer_dt = dt.predict(x_test)  # 得到分类结果
print("Prediction done!")
print("dt 用时：{:.2f}".format(time.perf_counter() - dt_start))

gnb_start = time.perf_counter()
# 创建贝叶斯分类器，并在测试集上进行预测
print("Start training Bayes")
gnb = GaussianNB().fit(X_train, y_train)
print("Training done!")
answer_gnb = gnb.predict(x_test)  # 得到分类结果
print("Prediction done!")
print("gnb 用时：{:.2f}".format(time.perf_counter() - gnb_start))




Start training knn
Training done!
[0 1 0 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 0 1 0 1 1 1 0 1 1 1 0 1 1 0 0 1 0
 1 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 1 0 1 1 1 0 0 1 1 0 1 0 0 1 1 0 1 1 1
 1 1 1 1 0 1 0 0 0 1 1 1 1 0 0 0 0 0 0 1 1 0 1 0 1 1 1 0 0 0 1 0 0 1 1 1 0
 1 1 0 1 1 1 1 1 1]
1.0
Prediction done!
knn 用时：0.00
Start training DT
Training done!
Prediction done!
dt 用时：0.00
Start training Bayes
Training done!
Prediction done!
gnb 用时：0.00
