In [7]:
import threading
import numpy as np
import time
import subprocess
import os
import psutil
import tensorflow as tf
import tempfile
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import LambdaCallback
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import onnxmltools
import onnxruntime as ort
from skl2onnx.common.data_types import FloatTensorType
from scipy.sparse import issparse
import hummingbird.ml
import torch
import pickle
import joblib
from sklearn.ensemble import RandomForestClassifier
from joblib import dump
import onnx
from skl2onnx import convert_sklearn
import onnxruntime as rt
from scipy.sparse import hstack
from nyoka import PMML44 as pml
from pypmml import Model
import joblib
from nyoka import skl_to_pmml
from sklearn.pipeline import Pipeline


In [8]:
# 加载 IMDB 数据集
max_features = 20000  # 使用的单词数量
maxlen = 100  # 每条评论的最大长度

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 将序列转换为文本
word_index = imdb.get_word_index()
index_word = {v: k for k, v in word_index.items()}

def sequences_to_texts(sequences):
    return [' '.join([index_word.get(i - 3, '?') for i in seq]) for seq in sequences]

X_train_text = sequences_to_texts(X_train)
X_test_text = sequences_to_texts(X_test)

# 使用TF-IDF向量化文本数据
vectorizer = TfidfVectorizer(max_features=max_features)
X_train_tfidf = vectorizer.fit_transform(X_train_text)
X_test_tfidf = vectorizer.transform(X_test_text)
X_test_tfidf_1 = vectorizer.transform(X_test_text[:1])
X_test_tfidf_10 = vectorizer.transform(X_test_text[:10])

# 创建随机森林分类器
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# 训练模型
clf.fit(X_train_tfidf, y_train)

# 预测测试集
y_pred = clf.predict(X_test_tfidf)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {accuracy * 100:.2f}%')

Test accuracy: 83.77%


In [3]:
# 保存训练好的模型到文件
model_filename = ('./models_train/imdb_random_forest_model.joblib')
dump(clf, model_filename)
vectorizer_filename = './models_train/imdb_random_forest_tfidf_vectorizer.joblib'
dump(vectorizer, vectorizer_filename)
print(f"Model saved to {model_filename}")

Model saved to ./models_train/imdb_random_forest_model.joblib


### IMBD RF joblib

In [9]:
# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # model = joblib.load('./models_train/imdb_random_forest_model.joblib')

    start_time = time.time()
    for i in range(10):
        joblib.dump(model, 'imdb_random_forest_model.joblib')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    for i in range(32): # 1*10
        model.predict(X_test_tfidf)
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载保存的模型
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_1))
#thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_10))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-joblib.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 7.7831 seconds
Time taken for inference on samples: 1.3939 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_random_forest/output-imdb-random_forest-joblib.txt
1034
['CPU Power: 3520 mW', 'GPU Power: 0 mW', 'CPU Power: 3270 mW', 'GPU Power: 0 mW', 'CPU Power: 7367 mW', 'GPU Power: 0 mW', 'CPU Power: 9356 mW', 'GPU Power: 131 mW', 'CPU Power: 6856 mW', 'GPU Power: 136 mW', 'CPU Power: 6547 mW', 'GPU Power: 257 mW', 'CPU Power: 4242 mW', 'GPU Power: 253 mW', 'CPU Power: 4538 mW', 'GPU Power: 320 mW', 'CPU Power: 5396 mW', 'GPU Power: 0 mW', 'CPU Power: 4268 mW', 'GPU Power: 63 mW', 'CPU Power: 6432 mW', 'GPU Power: 0 mW', 'CPU Power: 9272 mW', 'GPU Power: 187 mW', 'CPU Power: 6641 mW', 'GPU Power: 0 mW', 'CPU Power: 4356 mW', 'GPU Power: 184 mW', 'CPU Power: 4108 mW', 'GPU Power: 245 mW', 'CPU Power: 5936 mW', 'GPU Power: 0 mW', 'CPU Power: 3479 mW', 'GPU Power: 0 mW', 'CPU Power: 4535 mW', 'GPU 

### imdb RF ONNX

In [10]:

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # 将random_forest模型转换为onnx模型
    start_time = time.time()
    for i in range(1):
        # 转换为ONNX格式
        initial_type = [('float_input', FloatTensorType([None, max_features]))]
        onnx_model = convert_sklearn(clf, initial_types=initial_type,  target_opset=9)

        # 保存ONNX模型
        onnx_model_filename = 'imdb_random_forest_model.onnx'
        with open(onnx_model_filename, "wb") as f:
            f.write(onnx_model.SerializeToString())
        print(f"ONNX model saved to {onnx_model_filename}")
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    # 准备输入数据
    # 如果特征数量少于max_features，则补充空的特征
    if X_test_tfidf.shape[1] < max_features:
        padding = np.zeros((X_test_tfidf.shape[0], max_features - X_test_tfidf.shape[1]))
        X_test_tfidf = hstack([X_test_tfidf, padding])
    X_test_tfidf_array = X_test_tfidf.toarray().astype(np.float32)

    # 使用ONNX runtime进行推理
    sess = rt.InferenceSession(onnx_model.SerializeToString())
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name

    # 进行推理
    for i in range(32): # 1*10
        predictions = sess.run([label_name], {input_name: X_test_tfidf_array})[0]
    # 进行推理
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_tfidf_array.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载保存的模型
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_1))
#thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_10))
thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-onnx.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
ONNX model saved to imdb_random_forest_model.onnx
Time taken to save model: 20.7219 seconds
Time taken for inference on 25000 samples: 17.5161 seconds
Resource monitoring finished.
Subprocess finished.
Content saved to ./imbd_models/output_random_forest/output-imdb-random_forest-onnx.txt
4804
['CPU Power: 13896 mW', 'GPU Power: 65 mW', 'CPU Power: 19431 mW', 'GPU Power: 63 mW', 'CPU Power: 21304 mW', 'GPU Power: 66 mW', 'CPU Power: 19479 mW', 'GPU Power: 63 mW', 'CPU Power: 17781 mW', 'GPU Power: 62 mW', 'CPU Power: 15010 mW', 'GPU Power: 67 mW', 'CPU Power: 14975 mW', 'GPU Power: 66 mW', 'CPU Power: 16367 mW', 'GPU Power: 64 mW', 'CPU Power: 15705 mW', 'GPU Power: 62 mW', 'CPU Power: 18520 mW', 'GPU Power: 63 mW', 'CPU Power: 15679 mW', 'GPU Power: 0 mW', 'CPU Power: 14657 mW', 'GPU Power: 63 mW', 'CPU Power: 15231 mW', 'GPU Power: 63 mW', 'CPU Power: 14109 mW', 'GPU Power: 62 mW', 'CPU Power: 17212 mW', 'GPU Power: 63 mW', 'CPU Power: 14717 mW', 'GPU Power: 65 mW'

### imdb RF PMML

In [22]:
pip install numpy scikit-learn joblib psutil nyoka pypmml

Note: you may need to restart the kernel to use updated packages.


In [2]:
from nyoka import PMML44 as pml
from pypmml import Model
import joblib
from nyoka import skl_to_pmml
from sklearn.pipeline import Pipeline

In [3]:

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model):
    global duration
    global inference_duration

    # 将random_forest模型转换为pmmL模型
    start_time = time.time()
    for i in range(1):
        # 创建一个Pipeline
        pipeline = Pipeline([("vectorizer", vectorizer), ("classifier", model)])

        # 导出为PMML
        pmml_filename = 'imdb_random_forest_model.pmml'
        skl_to_pmml(pipeline, col_names=vectorizer.get_feature_names_out(), target_name="sentiment", pmml_f_name=pmml_filename)
    print(f"Model exported to {pmml_filename}")
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    # 准备输入数据
    # 加载PMML模型
    model = Model.load(pmml_filename)

    # 准备输入数据
    input_text = "This movie was fantastic! I really enjoyed it."
    input_vector = vectorizer.transform([input_text]*10)

    # 进行推理
    y_pred = model.predict(input_vector.toarray())
    # 进行推理
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {input_vector.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载保存的模型
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

thread1 = threading.Thread(target=save_model, args=(stop_event, clf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-pmml.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Exception in thread Thread-5:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/env_name/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/env_name/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/tm/w_n0tvhd2jlds4ml_jrhf3yw0000gn/T/ipykernel_7772/3650548247.py", line 15, in save_model
NameError: name 'vectorizer' is not defined


Subprocess started.


KeyboardInterrupt: 

In [5]:
import threading
import time
import subprocess
import tempfile
import os
import re
import psutil
import joblib
from sklearn.pipeline import Pipeline
from nyoka import skl_to_pmml
from pypmml import Model
from sklearn.feature_extraction.text import TfidfVectorizer

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model):
    global duration
    global inference_duration

    # 将模型转换为PMML模型
    start_time = time.time()
    for i in range(1):
        try:
            # 创建一个Pipeline
            pipeline = Pipeline([("vectorizer", vectorizer), ("classifier", model)])

            # 导出为PMML
            pmml_filename = 'imdb_random_forest_model.pmml'
            skl_to_pmml(pipeline, col_names=vectorizer.get_feature_names_out(), target_name="sentiment", pmml_f_name=pmml_filename)
            print(f"Model exported to {pmml_filename}")
        except Exception as e:
            print(f"Error exporting model: {e}")
    
    end_time = time.time()
    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    # 准备输入数据
    # 加载PMML模型
    model = Model.load(pmml_filename)

    # 准备输入数据
    input_text = "This movie was fantastic! I really enjoyed it."
    input_vector = vectorizer.transform([input_text]*10)

    # 进行推理
    for _ in range(32):
        y_pred = model.predict(input_vector.toarray())
    # 进行推理
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {input_vector.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载保存的模型
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

# 加载向量化器
vectorizer_filename = './models_train/imdb_random_forest_model.joblib'
vectorizer = joblib.load(vectorizer_filename)

thread1 = threading.Thread(target=save_model, args=(stop_event, clf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "GPU Power" 和 "CPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-pmml.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")

Error exporting model: 'RandomForestClassifier' object has no attribute 'get_feature_names_out'
Time taken to save model: 0.0007 seconds
Subprocess started.


KeyboardInterrupt: 