In [10]:
import numpy as np
import xgboost as xgb
import os
from tensorflow.keras.datasets import mnist
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import threading
import time
import subprocess
import psutil
import tempfile
from nyoka import xgboost_to_pmml
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import re

In [11]:
import xgboost as xgb
import onnxmltools
import onnxruntime as ort
from skl2onnx.common.data_types import FloatTensorType
import numpy as np
from scipy.sparse import issparse
from sklearn.metrics import accuracy_score

loaded_model = xgb.XGBClassifier()
loaded_model.load_model('./models_train/mnist_xgboost_model.json')

# 指定模型输入的数据类型
initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
# 转换模型
onnx_model = onnxmltools.convert_xgboost(loaded_model, initial_types=initial_type)
onnx_model_path = 'imdb_xgboost_model.onnx'
onnxmltools.utils.save_model(onnx_model, onnx_model_path)


In [12]:
# 加载 MNIST 数据集
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# 将图像数据从 3D 重塑为 2D (每个图像为一行，像素为列)
X_train_2d = X_train.reshape(-1, 28*28)
X_test_2d = X_test.reshape(-1, 28*28)

# 将数据类型转换为 float32
X_train_2d = X_train_2d.astype('float32') / 255
X_test_2d = X_test_2d.astype('float32') / 255

# XGBoost 对象
model = xgb.XGBClassifier(
    objective='multi:softmax',  # 目标函数
    num_class=10,               # 类别数，与 MNIST 的标签数量相同
    n_estimators=100,           # 树的个数
    max_depth=6,                # 树的深度
    learning_rate=0.1           # 学习速率
)

In [7]:


# 训练模型
model.fit(X_train_2d, y_train)

# 预测测试集
y_pred = model.predict(X_test_2d)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print("Test accuracy: {:.2f}%".format(accuracy * 100))

# loaded_model = xgb.XGBClassifier()
# loaded_model.load_model('./models_train/mnist_xgboost_model.json')
model.save_model('./models_train/mnist_xgboost_model.json')

Test accuracy: 97.02%


## XGB Mnist JSON

In [13]:
import time
import numpy as np
import joblib
import threading
import psutil
import subprocess
import tempfile
import os
import re
import xgboost as xgb
from tensorflow.keras.datasets import mnist

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_2d):
    global duration
    global inference_duration

    start_time = time.time()
    for i in range(5):
        model.save_model('mnist_xgboost_model.json')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    
    # 批量推理部分
    batch_size = 128
    num_samples = X_test_2d.shape[0]
    num_batches = (num_samples + batch_size - 1) // batch_size  # 计算批次数量

    for _ in range(1):  # 推理循环次数为 1
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_2d[batch_start:batch_end]
            predictions = model.predict(batch)
            # 在此处处理预测结果，例如保存或输出

    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {num_samples} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()


# 加载XGBoost模型
loaded_model = xgb.XGBClassifier()
loaded_model.load_model('./models_train/mnist_xgboost_model.json')

thread1 = threading.Thread(target=save_model, args=(stop_event, loaded_model, X_test_2d))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './mnist_models/output_xgboost/output-mnist-xgboost-json.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
#print(filtered_lines_count)
#print(filtered_lines)

duration = inference_duration
# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")

Subprocess started.
Time taken to save model: 0.9430 seconds
Time taken for inference on 10000 samples: 0.2236 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./mnist_models/output_xgboost/output-mnist-xgboost-json.txt
Total energy consumption: 2980.24 mV


## XGB Mnist ONNX

In [14]:
import threading
import time
import subprocess
import tempfile
import os
import re
import psutil
import numpy as np
import xgboost as xgb
import onnxmltools
from onnxmltools.convert.xgboost import convert
from skl2onnx.common.data_types import FloatTensorType
from onnxruntime import InferenceSession
from tensorflow.keras.datasets import mnist

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_2d):
    global duration
    global inference_duration

    initial_type = [('float_input', FloatTensorType([None, X_test_2d.shape[1]]))]
    # 转换模型
    onnx_model = convert(model, initial_types=initial_type)
    onnx_model_path = 'mnist_xgboost_model.onnx'

    start_time = time.time()
    for i in range(1):
        onnxmltools.utils.save_model(onnx_model, onnx_model_path)
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    # 进行批量推理
    session = InferenceSession(onnx_model_path)
    input_name = session.get_inputs()[0].name

    batch_size = 128
    num_samples = X_test_2d.shape[0]
    num_batches = (num_samples + batch_size - 1) // batch_size

    for _ in range(1):  # 推理循环次数为 1
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_2d[batch_start:batch_end].astype(np.float32)
            predictions = session.run(None, {input_name: batch})

    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {num_samples} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()


# 加载XGBoost模型
loaded_model = xgb.XGBClassifier()
loaded_model.load_model('./models_train/mnist_xgboost_model.json')

# save_model(stop_event, loaded_model, X_test_2d)

thread1 = threading.Thread(target=save_model, args=(stop_event, loaded_model, X_test_2d))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './mnist_models/output_xgboost/output-mnist-xgboost-onnx.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
#print(filtered_lines_count)
#print(filtered_lines)

duration = inference_duration
# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 0.0276 seconds
Time taken for inference on 10000 samples: 0.2116 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./mnist_models/output_xgboost/output-mnist-xgboost-onnx.txt
Total energy consumption: 1272.85 mV


## XGBOOST MNIST PY

In [15]:
import threading
import time
import subprocess
import tempfile
import os
import re
import psutil
import numpy as np
import xgboost as xgb
import hummingbird.ml
import torch
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist  # 确保导入 MNIST 数据集

In [16]:
import threading
import time
import subprocess
import tempfile
import os
import re
import psutil
import numpy as np
import xgboost as xgb
import torch
import hummingbird.ml
from tensorflow.keras.datasets import mnist

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_2d):
    global duration
    global inference_duration

    # 将XGBoost模型转换为PyTorch模型
    start_time = time.time()
    for i in range(1):
        pytorch_model = hummingbird.ml.convert(model, 'pytorch')
        # 保存PyTorch模型
        torch.save(pytorch_model.model, './mnist_xgboost_model.pth')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    # 准备输入数据
    start_time_inference = time.time()
    # 将测试集数据转换为PyTorch张量
    X_test_torch = torch.tensor(X_test_2d, dtype=torch.float32)

    # 批量推理部分
    batch_size = 128
    num_samples = X_test_torch.shape[0]
    num_batches = (num_samples + batch_size - 1) // batch_size

    for _ in range(1):  # 推理循环次数为 1
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_torch[batch_start:batch_end]
            y_pred_torch = pytorch_model.predict(batch)

    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_torch.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载XGBoost模型
loaded_model = xgb.XGBClassifier()
loaded_model.load_model('./models_train/mnist_xgboost_model.json')

# 启动并监控线程
thread1 = threading.Thread(target=save_model, args=(stop_event, loaded_model, X_test_2d))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './mnist_models/output_xgboost/output-mnist-xgboost-pth.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
# print(filtered_lines_count)
# print(filtered_lines)

duration = inference_duration
# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 3.7278 seconds
Time taken for inference on 10000 samples: 0.6469 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./mnist_models/output_xgboost/output-mnist-xgboost-pth.txt
Total energy consumption: 3509.25 mV
