In [None]:
import pandas as pd
import numpy as np
import time
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
warnings.filterwarnings("ignore")

In [None]:
# 读取数据并划分训练集和测试集

data = pd.read_csv('Activity_data.csv')

In [None]:
# 读取数据并划分训练集和测试集

X = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [None]:
# 划分数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# 函数来训练模型并输出结果
def evaluate_model(model, X_train, X_test, y_train, y_test, model_name):
    start_train = time.time()
    model.fit(X_train, y_train)
    end_train = time.time()

    start_test = time.time()
    y_pred = model.predict(X_test)
    end_test = time.time()

    # 计算训练和测试时间
    train_time = end_train - start_train
    test_time = end_test - start_test

    # 生成分类报告、准确率和混淆矩阵
    report = classification_report(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    print(f"模型: {model_name}")
    print(f"训练用时: {train_time:.4f} seconds")
    print(f"测试用时: {test_time:.4f} seconds")
    print(f"准确率: {accuracy:.4f}")
    print("分类报告:\n", report)
    print("混淆矩阵:\n", conf_matrix)

In [None]:
# 随机森林
rf_model = RandomForestClassifier(random_state=42)
evaluate_model(rf_model, X_train, X_test, y_train, y_test, "随机森林")

模型: 随机森林
训练用时: 2.1310 seconds
测试用时: 0.0237 seconds
准确率: 0.9600
分类报告:
               precision    recall  f1-score   support

           0       0.99      1.00      1.00       101
           1       1.00      1.00      1.00        90
           2       1.00      1.00      1.00        87
           3       1.00      1.00      1.00        96
           4       1.00      1.00      1.00        98
           5       1.00      1.00      1.00        92
           6       1.00      1.00      1.00        81
           7       1.00      1.00      1.00        90
           8       1.00      0.99      0.99        97
           9       1.00      0.99      0.99        90
          10       1.00      1.00      1.00        75
          11       0.99      1.00      0.99        80
          12       0.72      0.69      0.70        90
          13       0.70      0.73      0.71        89
          14       1.00      1.00      1.00        94

    accuracy                           0.96      1350
   macro a

In [None]:
# XGBoost
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
evaluate_model(xgb_model, X_train, X_test, y_train, y_test, "XGBoost")

模型: XGBoost
训练用时: 2.0463 seconds
测试用时: 0.0264 seconds
准确率: 0.9615
分类报告:
               precision    recall  f1-score   support

           0       0.99      1.00      1.00       101
           1       1.00      1.00      1.00        90
           2       1.00      1.00      1.00        87
           3       1.00      1.00      1.00        96
           4       1.00      1.00      1.00        98
           5       1.00      1.00      1.00        92
           6       1.00      1.00      1.00        81
           7       1.00      1.00      1.00        90
           8       1.00      0.99      0.99        97
           9       1.00      0.99      0.99        90
          10       1.00      1.00      1.00        75
          11       0.99      1.00      0.99        80
          12       0.74      0.68      0.71        90
          13       0.70      0.76      0.73        89
          14       1.00      1.00      1.00        94

    accuracy                           0.96      1350
   macr

In [None]:
# LightGBM
lgbm_model = LGBMClassifier(random_state=42)
warnings.filterwarnings("ignore")
evaluate_model(lgbm_model, X_train, X_test, y_train, y_test, "LightGBM")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002594 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15300
[LightGBM] [Info] Number of data points in the train set: 3150, number of used features: 60
[LightGBM] [Info] Start training from score -2.761853
[LightGBM] [Info] Start training from score -2.708050
[LightGBM] [Info] Start training from score -2.693866
[LightGBM] [Info] Start training from score -2.737038
[LightGBM] [Info] Start training from score -2.746890
[LightGBM] [Info] Start training from score -2.717620
[LightGBM] [Info] Start training from score -2.666086
[LightGBM] [Info] Start training from score -2.708050
[LightGBM] [Info] Start training from score -2.741952
[LightGBM] [Info] Start training from score -2.708050
[LightGBM] [Info] Start training from score -2.639057
[LightGBM] [Info] Start training from score -2.661530
[LightGBM] [Info] Start training from score -2.708050
[LightGBM