In [1]:
import threading
import numpy as np
import time
import subprocess
import os
import psutil
import tensorflow as tf
import tempfile
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import LambdaCallback
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
import re
import onnxmltools
import onnxruntime as ort
from skl2onnx.common.data_types import FloatTensorType
from scipy.sparse import issparse
import hummingbird.ml
import torch
import pickle
import joblib
from sklearn.ensemble import RandomForestClassifier
from joblib import dump
import onnx
from skl2onnx import convert_sklearn
import onnxruntime as rt
from scipy.sparse import hstack
from nyoka import PMML44 as pml
from pypmml import Model
import joblib
from nyoka import skl_to_pmml
from sklearn.pipeline import Pipeline


2024-08-08 18:07:13.059422: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# 加载 IMDB 数据集
max_features = 20000  # 使用的单词数量
maxlen = 100  # 每条评论的最大长度

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 将序列转换为文本
word_index = imdb.get_word_index()
index_word = {v: k for k, v in word_index.items()}

def sequences_to_texts(sequences):
    return [' '.join([index_word.get(i - 3, '?') for i in seq]) for seq in sequences]

X_train_text = sequences_to_texts(X_train)
X_test_text = sequences_to_texts(X_test)

# 使用TF-IDF向量化文本数据
vectorizer = TfidfVectorizer(max_features=max_features)
X_train_tfidf = vectorizer.fit_transform(X_train_text)
X_test_tfidf = vectorizer.transform(X_test_text)
X_test_tfidf_1 = vectorizer.transform(X_test_text[:1])
X_test_tfidf_200 = vectorizer.transform(X_test_text[:200])
X_test_tfidf_500 = vectorizer.transform(X_test_text[:500])
X_test_tfidf_1000 = vectorizer.transform(X_test_text[:1000])
X_test_tfidf_10000 = vectorizer.transform(X_test_text[:10000])

In [11]:

# 创建随机森林分类器
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# 训练模型
clf.fit(X_train_tfidf, y_train)

# 预测测试集
y_pred = clf.predict(X_test_tfidf)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {accuracy * 100:.2f}%')

Test accuracy: 83.77%


In [12]:
# 保存训练好的模型到文件
model_filename = ('./models_train/imdb_random_forest_model.joblib')
dump(clf, model_filename)
vectorizer_filename = './models_train/imdb_random_forest_tfidf_vectorizer.joblib'
dump(vectorizer, vectorizer_filename)
print(f"Model saved to {model_filename}")

Model saved to ./models_train/imdb_random_forest_model.joblib


### IMBD RF joblib

In [3]:
# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # model = joblib.load('./models_train/imdb_random_forest_model.joblib')

    start_time = time.time()
    for i in range(10):
        joblib.dump(model, 'imdb_random_forest_model.joblib')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    X_test_tfidf_array = X_test_tfidf.astype(np.float32)

    batch_size = 128
    num_samples = X_test_tfidf_array.shape[0]
    num_batches = (num_samples + batch_size - 1) // batch_size  # 计算批次数量

    start_time_inference = time.time()
    for _ in range(1):  # 推理循环次数为 1
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_tfidf_array[batch_start:batch_end]
            predictions = model.predict(batch)
            # 在此处处理预测结果，例如保存或输出

    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {num_samples} samples: {inference_duration:.4f} seconds')
    stop_event.set()  # 触发停止其他线程


# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载保存的模型
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

#thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf))
thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_10000))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-joblib.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
#print(filtered_lines_count)
#print(filtered_lines)


duration = inference_duration
# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 0.9118 seconds
Time taken for inference on 10000 samples: 1.7299 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_random_forest/output-imdb-random_forest-joblib.txt
Total energy consumption: 8141.84 mV


### imdb RF ONNX

In [8]:
import time
import numpy as np
import joblib
import threading
import psutil
import subprocess
import tempfile
import os
import re
from scipy.sparse import hstack
from sklearn.feature_extraction.text import TfidfVectorizer
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt
from tensorflow.keras.datasets import imdb

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # 将random_forest模型转换为onnx模型
    start_time = time.time()
    for i in range(5):
        # 转换为ONNX格式
        initial_type = [('float_input', FloatTensorType([None, max_features]))]
        #onnx_model = convert_sklearn(model, initial_types=initial_type, target_opset=9)

        # 保存ONNX模型
        onnx_model_filename = 'imdb_random_forest_model.onnx'
        # with open(onnx_model_filename, "wb") as f:
        #     f.write(onnx_model.SerializeToString())
        # print(f"ONNX model saved to {onnx_model_filename}")
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    # 准备输入数据
    # 如果特征数量少于max_features，则补充空的特征
    if X_test_tfidf.shape[1] < max_features:
        padding = np.zeros((X_test_tfidf.shape[0], max_features - X_test_tfidf.shape[1]))
        X_test_tfidf = hstack([X_test_tfidf, padding])
    X_test_tfidf_array = X_test_tfidf.toarray().astype(np.float32)

    # 使用ONNX runtime进行推理
    sess = rt.InferenceSession(onnx_model_filename)
    # sess = rt.InferenceSession(onnx_model.SerializeToString())
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name

    # 批量推理部分
    batch_size = 128
    num_samples = X_test_tfidf_array.shape[0]
    num_batches = (num_samples + batch_size - 1) // batch_size  # 计算批次数量

    for _ in range(1):  # 推理循环次数为 1
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_tfidf_array[batch_start:batch_end]
            predictions = sess.run([label_name], {input_name: batch})[0]
            # 在此处处理预测结果，例如保存或输出

    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {num_samples} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()


# 加载保存的模型
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

# 创建并启动线程
#thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf))
thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_10000))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-onnx.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
#print(filtered_lines_count)
#print(filtered_lines)

duration = inference_duration

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Time taken to save model: 0.0000 seconds
Subprocess started.
Time taken for inference on 10000 samples: 3.1971 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_random_forest/output-imdb-random_forest-onnx.txt
Total energy consumption: 17335.55 mV


### RF IMDB pth

In [27]:
import threading
import time
import subprocess
import tempfile
import os
import re
import psutil
import numpy as np
import joblib
import torch
from hummingbird.ml import convert
from sklearn.feature_extraction.text import TfidfVectorizer

# Global variables
thread_output = {}
duration = 0
inference_duration = 0

# Function to save the model and perform inference
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    start_time = time.time()
    # Convert the sklearn model to a PyTorch model using Hummingbird
    pytorch_model = convert(model, 'pytorch')
    torch.save(pytorch_model, './imdb_random_forest_model.pth')  # Save the model
    end_time = time.time()
    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    # Perform inference
    start_time_inference = time.time()
    X_test_torch = torch.from_numpy(X_test_tfidf.toarray().astype(np.float32))  # Convert to tensor
    batch_size = 128
    num_samples = X_test_torch.shape[0]
    num_batches = (num_samples + batch_size - 1) // batch_size

    # No need to switch to evaluation mode
    with torch.no_grad():
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_torch[batch_start:batch_end]
            predictions = pytorch_model.predict(batch)  # Use predict directly

    end_time_inference = time.time()
    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {num_samples} samples: {inference_duration:.4f} seconds')
    stop_event.set()

# Function to monitor resource usage
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []
    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)

    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# Function to run external script and capture output
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:
                    break
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()

        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        os.remove(tmp_file.name)
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# Load the model and data
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

# Start threads
stop_event = threading.Event()


thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_10000))
#thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_1000))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# Wait for threads to complete
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-pth.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
#print(filtered_lines_count)
#print(filtered_lines)

duration = inference_duration

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 18.4107 seconds
Time taken for inference on 10000 samples: 24.2019 seconds
Resource monitoring finished.
Subprocess finished.
Content saved to ./imbd_models/output_random_forest/output-imdb-random_forest-pth.txt
Total energy consumption: 111257.33 mV


### imdb RF PMML

In [33]:
from nyoka import PMML44 as pml
from pypmml import Model
import joblib
from nyoka import skl_to_pmml
from sklearn.pipeline import Pipeline
from sklearn2pmml import PMMLPipeline

In [39]:
import threading
import time
import numpy as np
import joblib
import psutil
import subprocess
import tempfile
import os
from sklearn.pipeline import Pipeline
from sklearn2pmml import sklearn2pmml
from pypmml import Model

# Initialize global variables
thread_output = {}
duration = 0
inference_duration = 0

def save_model(stop_event, model, X_test_2d):
    global duration
    global inference_duration

    start_time = time.time()
    # Create a PMMLPipeline with the classifier
    pipeline = PMMLPipeline([("classifier", model)])
    
    # Export to PMML
    pmml_filename = 'imdb_random_forest_model.pmml'
    sklearn2pmml(pipeline, pmml_filename)
    print(f"Model exported to {pmml_filename}")
    
    end_time = time.time()
    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    # Load PMML model
    model = Model.load(pmml_filename)

    # Prepare input data
    X_test_2d_array = X_test_2d.astype(np.float32)

    # Batch inference
    batch_size = 128
    num_samples = len(X_test_2d_array)
    num_batches = (num_samples + batch_size - 1) // batch_size

    start_time_inference = time.time()
    predictions = []
    for _ in range(1):  # Inference loop count as 1
        for batch_idx in range(num_batches):
            batch_start = batch_idx * batch_size
            batch_end = min(batch_start + batch_size, num_samples)
            batch = X_test_2d_array[batch_start:batch_end]
            predictions.extend(model.predict(batch))

    end_time_inference = time.time()
    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {num_samples} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # Signal to stop other threads

def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []
    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)

    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:
                    break
            if process.poll() is None:
                process.terminate()
                process.wait(timeout=0.1)

        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()

        os.remove(tmp_file.name)
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# Create and start threads
stop_event = threading.Event()
loaded_model = joblib.load('./models_train/imdb_random_forest_model.joblib')
X_test_2d = np.random.rand(1000, 28*28)  # Example data

thread1 = threading.Thread(target=save_model, args=(stop_event, loaded_model, X_test_2d))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# Wait for threads to complete
thread1.join()
thread2.join()
thread3.join()

# Output collected data
content = thread_output.get('powermetrics', 'No output captured')
lines = content.split('\n')
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
filtered_content = '\n'.join(filtered_lines)

output_file_name = './mnist_models/output_rf/output-imdb-rf-pmml.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)

duration = inference_duration

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Model exported to imdb_random_forest_model.pmml
Time taken to save model: 51.3694 seconds
Time taken for inference on 1000 samples: 281.7404 seconds
Resource monitoring finished.
Subprocess finished.
Content saved to ./mnist_models/output_rf/output-imdb-rf-pmml.txt
Total energy consumption: 1577398.50 mV
