In [8]:
import threading
import numpy as np
import time
import subprocess
import os
import psutil
import tensorflow as tf
import tempfile
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import LambdaCallback
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import onnxmltools
import onnxruntime as ort
from skl2onnx.common.data_types import FloatTensorType
from scipy.sparse import issparse
import hummingbird.ml
import torch
from sklearn2pmml import PMMLPipeline, sklearn2pmml
from sklearn2pmml.pipeline import PMMLPipeline
from pypmml import Model
from sklearn.datasets import fetch_20newsgroups
from xgboost import XGBClassifier
import pandas as pd


In [9]:
# 加载 IMDB 数据集
max_features = 20000  # 使用的单词数量
maxlen = 100  # 每条评论的最大长度

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 将序列转换为文本
word_index = imdb.get_word_index()
index_word = {v: k for k, v in word_index.items()}

def sequences_to_texts(sequences):
    return [' '.join([index_word.get(i - 3, '?') for i in seq]) for seq in sequences]

X_train_text = sequences_to_texts(X_train)
X_test_text = sequences_to_texts(X_test)

# 使用TF-IDF向量化文本数据
vectorizer = TfidfVectorizer(max_features=max_features)
X_train_tfidf = vectorizer.fit_transform(X_train_text)
X_test_tfidf = vectorizer.transform(X_test_text)

In [3]:
## 模型训练
# XGBoost 对象
model = xgb.XGBClassifier(
    objective='binary:logistic',  # 目标函数，二分类问题使用'logistic'目标函数
    n_estimators=100,             # 树的个数
    max_depth=6,                  # 树的深度
    learning_rate=0.1             # 学习速率
)

# 训练模型
model.fit(X_train_tfidf, y_train)

# 预测测试集
y_pred = model.predict(X_test_tfidf)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print("Test accuracy: {:.2f}%".format(accuracy * 100))

# 保存模型到本地文件
model.save_model('./models_train/imdb_xgboost_model.json')

Test accuracy: 83.10%


### imdb xg json

In [6]:
# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    start_time = time.time()
    for i in range(5):
        model.save_model('imdb_xgboost_model.json')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    # 确保输入数据格式正确
    X_test_sample = X_test_tfidf[:10000]

    start_time_inference = time.time()
    # 进行推理
    for i in range(32): # 1*10
        model.predict(X_test_tfidf)
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_sample.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载XGBoost模型
loaded_model = xgb.XGBClassifier()
loaded_model.load_model('./models_train/imdb_xgboost_model.json')

# save_model(stop_event, loaded_model, X_test_tfidf)

thread1 = threading.Thread(target=save_model, args=(stop_event, loaded_model, X_test_tfidf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_xgboost/output-imdb-xgboost-json.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
#print(filtered_lines_count)
#print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")

Subprocess started.
Time taken to save model: 1.3039 seconds
Time taken for inference on 10000 samples: 2.0365 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_xgboost/output-imdb-xgboost-json.txt
328
['CPU Power: 15688 mW', 'GPU Power: 0 mW', 'CPU Power: 13204 mW', 'GPU Power: 65 mW', 'CPU Power: 15592 mW', 'GPU Power: 0 mW', 'CPU Power: 17213 mW', 'GPU Power: 0 mW', 'CPU Power: 17244 mW', 'GPU Power: 0 mW', 'CPU Power: 20059 mW', 'GPU Power: 0 mW', 'CPU Power: 21972 mW', 'GPU Power: 0 mW', 'CPU Power: 20838 mW', 'GPU Power: 67 mW', 'CPU Power: 19929 mW', 'GPU Power: 68 mW', 'CPU Power: 19641 mW', 'GPU Power: 0 mW', 'CPU Power: 21909 mW', 'GPU Power: 0 mW', 'CPU Power: 23434 mW', 'GPU Power: 0 mW', 'CPU Power: 14535 mW', 'GPU Power: 129 mW', 'CPU Power: 8179 mW', 'GPU Power: 190 mW', 'CPU Power: 10152 mW', 'GPU Power: 68 mW', 'CPU Power: 7763 mW', 'GPU Power: 62 mW', 'CPU Power: 6515 mW', 'GPU Power: 0 mW', 'CPU Power: 11506 mW', 'GPU Po

In [18]:
import threading
import time
import subprocess
import tempfile
import os
import re
import psutil
import numpy as np
import xgboost as xgb
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.datasets import imdb

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    start_time = time.time()
    for i in range(50):
        model.save_model('imdb_xgboost_model.json')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    # 确保输入数据格式正确
    X_test_sample = X_test_tfidf[:10000]

    start_time_inference = time.time()
    # 批量推理部分
    batch_size = 128
    num_samples = X_test_sample.shape[0]
    num_batches = (num_samples + batch_size - 1) // batch_size

    for _ in range(1):  # 推理
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_sample[batch_start:batch_end]
            predictions = model.predict(batch)
            # 在此处处理预测结果，例如保存或输出

    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_sample.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载IMDB数据集并进行TF-IDF向量化
max_features = 20000  # 使用的单词数量
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 将序列转换为文本
word_index = imdb.get_word_index()
index_word = {v: k for k, v in word_index.items()}

def sequences_to_texts(sequences):
    return [' '.join([index_word.get(i - 3, '?') for i in seq]) for seq in sequences]

X_train_text = sequences_to_texts(X_train)
X_test_text = sequences_to_texts(X_test)

# 使用TF-IDF向量化文本数据
vectorizer = TfidfVectorizer(max_features=max_features)
X_train_tfidf = vectorizer.fit_transform(X_train_text)
X_test_tfidf = vectorizer.transform(X_test_text)

# 加载XGBoost模型
loaded_model = xgb.XGBClassifier()
loaded_model.load_model('./models_train/imdb_xgboost_model.json')

# save_model(stop_event, loaded_model, X_test_tfidf)

thread1 = threading.Thread(target=save_model, args=(stop_event, loaded_model, X_test_tfidf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_xgboost/output-imdb-xgboost-json.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
#print(filtered_lines_count)
#print(filtered_lines)

duration = inference_duration
# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 0.9856 seconds
Time taken for inference on 10000 samples: 0.1916 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_xgboost/output-imdb-xgboost-json.txt
Total energy consumption: 2074.66 mV


### imdb xg onnx

In [19]:
import threading
import time
import subprocess
import tempfile
import os
import re
import psutil
import numpy as np
import xgboost as xgb
import onnxruntime as ort
import onnxmltools
from skl2onnx.common.data_types import FloatTensorType
from scipy.sparse import issparse
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.datasets import imdb

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # 定义ONNX转换器的输入类型
    initial_type = [('float_input', FloatTensorType([None, max_features]))]

    start_time = time.time()
    # 将 XGBoost 模型转换为 ONNX 格式
    for i in range(1):
        onnx_model = onnxmltools.convert_xgboost(model, initial_types=initial_type)
        onnx_model_path = 'imdb_xgboost_model.onnx'
        onnxmltools.utils.save_model(onnx_model, onnx_model_path)
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    ort_session = ort.InferenceSession(onnx_model_path)
    # 准备输入数据
    def to_numpy(matrix):
        if issparse(matrix):
            return matrix.todense().astype(np.float32)
        return matrix.astype(np.float32)
    
    X_test_numpy = to_numpy(X_test_tfidf)
    
    # 确保输入数据的形状与模型期望的一致
    if X_test_numpy.shape[1] < max_features:
        padding = np.zeros((X_test_numpy.shape[0], max_features - X_test_numpy.shape[1]), dtype=np.float32)
        X_test_numpy = np.hstack((X_test_numpy, padding))
    elif X_test_numpy.shape[1] > max_features:
        X_test_numpy = X_test_numpy[:, :max_features]

    input_name = ort_session.get_inputs()[0].name
    output_name = ort_session.get_outputs()[0].name

    # 确保输入数据格式正确
    X_test_sample = X_test_numpy[:10000]

    start_time_inference = time.time()
    
    # 批量推理部分
    batch_size = 128
    num_samples = X_test_sample.shape[0]
    num_batches = (num_samples + batch_size - 1) // batch_size

    for _ in range(1):  # 推理循环次数为 1
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_sample[batch_start:batch_end]
            input_data = {input_name: batch}
            pred_onnx = ort_session.run([output_name], input_data)[0]

    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_sample.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()


# 加载XGBoost模型
model = xgb.XGBClassifier()
model.load_model('./models_train/imdb_xgboost_model.json')

thread1 = threading.Thread(target=save_model, args=(stop_event, model, X_test_tfidf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_xgboost/output-imdb-xgboost-onnx.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
# print(filtered_lines_count)
# print(filtered_lines)

duration = inference_duration

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 0.5587 seconds
Time taken for inference on 10000 samples: 0.1426 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_xgboost/output-imdb-xgboost-onnx.txt
Total energy consumption: 1084.31 mV


### imdb xg pytorch

In [20]:
import threading
import time
import subprocess
import tempfile
import os
import re
import psutil
import numpy as np
import xgboost as xgb
import torch
import hummingbird.ml
from tensorflow.keras.datasets import imdb
from sklearn.feature_extraction.text import TfidfVectorizer

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # 将XGBoost模型转换为PyTorch模型
    start_time = time.time()
    for i in range(1):
        pytorch_model = hummingbird.ml.convert(model, 'pytorch')
        # 保存PyTorch模型
        torch.save(pytorch_model.model, './imdb_xgboost_model.pth')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    # 选择前10000个样本
    X_test_sample = X_test_tfidf[:10000]

    # 准备输入数据
    start_time_inference = time.time()
    # 将测试集数据转换为PyTorch张量
    X_test_torch = torch.tensor(X_test_sample.toarray(), dtype=torch.float32)

    # 批量推理部分
    batch_size = 128
    num_samples = X_test_torch.shape[0]
    num_batches = (num_samples + batch_size - 1) // batch_size

    for _ in range(1):  # 推理循环次数为 1
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_torch[batch_start:batch_end]
            y_pred_torch = pytorch_model.predict(batch)

    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_torch.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载XGBoost模型
model = xgb.XGBClassifier()
model.load_model('./models_train/imdb_xgboost_model.json')

thread1 = threading.Thread(target=save_model, args=(stop_event, model, X_test_tfidf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_xgboost/output-imdb-xgboost-pth.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
#print(filtered_lines_count)
#print(filtered_lines)

duration = inference_duration
# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 0.3324 seconds
Time taken for inference on 10000 samples: 1.4757 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_xgboost/output-imdb-xgboost-pth.txt
Total energy consumption: 11274.51 mV
