In [1]:
import threading
import numpy as np
import time
import subprocess
import os
import psutil
import tensorflow as tf
import tempfile
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import LambdaCallback

2024-07-30 20:31:17.462772: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# 全局变量用于存储子线程的输出
thread_output = {}

# 加载 IMDB 数据集
max_features = 20000  # 使用的单词数量
maxlen = 100  # 每条评论的最大长度

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 数据预处理和填充
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

# 构建 LSTM 模型
model = Sequential([
    Embedding(input_dim=max_features, output_dim=128, input_length=maxlen),
    LSTM(units=128, return_sequences=True),  # 第一层LSTM，返回序列以叠加另一层LSTM
    LSTM(units=128),  # 第二层LSTM
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # 二分类问题，使用sigmoid激活函数
])

# 编译模型
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 自定义批次生成器
def batch_generator(X, y, batch_size):
    while True:
        for start in range(0, len(X), batch_size):
            end = min(start + batch_size, len(X))
            yield X[start:end], y[start:end]

batch_size = 128
train_gen = batch_generator(X_train, y_train, batch_size)

# 监控资源使用情况
def monitor_resources():
    memory_info = psutil.virtual_memory()
    print(f"Memory usage: {memory_info.percent}%")
    cpu_usage = psutil.cpu_percent(interval=1)
    print(f"CPU usage: {cpu_usage}%")

# 训练模型并监控资源使用情况
steps_per_epoch = len(X_train) // batch_size
model.fit(train_gen, epochs=2, steps_per_epoch=steps_per_epoch, verbose=2, 
          callbacks=[LambdaCallback(on_epoch_end=lambda epoch, logs: monitor_resources())])

# 评估模型
score = model.evaluate(X_test, y_test, verbose=0)
print(f'Test accuracy: {score[1]}')

model.save('./models_train/imdb_lstm_model.h5')

2024-07-30 20:31:25.140554: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/2
Memory usage: 89.1%
CPU usage: 72.6%
195/195 - 43s - loss: 0.4145 - accuracy: 0.8007 - 43s/epoch - 220ms/step
Epoch 2/2
Memory usage: 89.4%
CPU usage: 35.5%
195/195 - 41s - loss: 0.2285 - accuracy: 0.9114 - 41s/epoch - 208ms/step
Test accuracy: 0.8392400145530701


In [3]:
import threading
import time
import subprocess
import os
import psutil
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import LambdaCallback
import tempfile
import re
import tf2onnx
import coremltools as ct


scikit-learn version 1.3.2 is not supported. Minimum required version: 0.17. Maximum required version: 1.1.2. Disabling scikit-learn conversion API.
XGBoost version 2.1.0 has not been tested with coremltools. You may run into unexpected errors. XGBoost 1.4.2 is the most recent version that has been tested.
Torch version 2.2.2 has not been tested with coremltools. You may run into unexpected errors. Torch 2.2.0 is the most recent version that has been tested.
  from .autonotebook import tqdm as notebook_tqdm


LSTM-IMDB-H5

In [8]:
thread_output = {}
duration = 0
# 定义监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', 
                                                  '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 定义保存模型的函数
def save_model(stop_event, model):
    global duration
    global inference_duration
    start_time = time.time()

    for i in range(0, 1):
        model.save("imdb_lstm_model.h5")

    end_time = time.time()
    duration = end_time - start_time
    print(f'Time taken to save TensorFlow SavedModel: {duration} seconds')
    
    # 测量推理时间
    X_test_sample = X_test[:10]  # 选择前10个样本进行推理 10*1
    # X_test_sample = X_test[:1]  # 选择前10个样本进行推理 10*1
    start_time_inference = time.time()
    for i in range(32): # 1*10
        model.predict(X_test_sample)
    end_time_inference = time.time()
    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {len(X_test_sample)} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], 
                                       stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()
# model = tf.keras.models.load_model("./mnist_models/imdb_lstm_model.h5")  # 重新加载模型以进行保存性能监控
model = tf.keras.models.load_model("./models_train/imdb_lstm_model.h5")  # 重新加载模型以进行保存性能监控


thread1 = threading.Thread(target=save_model, args=(stop_event, model))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or 
                  line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

# 保存过滤后的内容到文件
output_file_name = './imbd_models/output_lstm/output-imdb-lstm-h5.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

# 提取每一个采样点的数字，即CPU和GPU的具体mV
numbers = []
for line in filtered_lines:
    match = re.search(r'[\d.]+', line)
    if match:
        numbers.append(float(match.group())) 

delta_time = duration * 2 / len(filtered_lines)
numbers_scaled = [num * delta_time for num in numbers]
total_energy_consumption = sum(numbers_scaled)
print(f"Total energy consumption: {total_energy_consumption:.2f} mV")

Subprocess started.
Time taken to save TensorFlow SavedModel: 0.18689298629760742 seconds
Time taken for inference on 10 samples: 5.3661 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_lstm/output-imdb-lstm-h5.txt
Total energy consumption: 1624.90 mV


lstm-imbd-onnx

In [5]:
import onnxruntime as ort

In [7]:
thread_output = {}
duration = 0
inference_duration = 0
# X_test = None  # 确保在测试时能够使用到

# 定义监控资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', 
                                                  '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 定义保存模型的函数
def save_model(stop_event, model):
    global duration
    global inference_duration
    global X_test  # 确保引用全局变量

    start_time = time.time()
    
    # 使用 tf2onnx 保存模型为ONNX格式
    spec = (tf.TensorSpec((None, 100), tf.float32, name="input"),)
    output_path = "imdb_lstm_model.onnx"
    
    for i in range(1):
        model_proto, _ = tf2onnx.convert.from_keras(model, input_signature=spec, output_path=output_path)
    
    end_time = time.time()
    duration = end_time - start_time
    print(f'Time taken to save ONNX model: {duration} seconds')
    
    # 测量推理时间
    # 使用 ONNX Runtime 加载模型
    ort_session = ort.InferenceSession(output_path)
    
    # 准备 ONNX 输入
    X_test_sample = X_test[:10].astype(np.float32)  # 选择前10个样本进行推理
    
    # 测量推理时间
    start_time_inference = time.time()
    
    # 获取模型输入名称
    input_name = ort_session.get_inputs()[0].name
    
    # 进行推理
    for i in range(32): # 1*10
        ort_outs = ort_session.run(None, {input_name: X_test_sample})
    
    end_time_inference = time.time()
    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {len(X_test_sample)} samples: {inference_duration:.4f} seconds')


    stop_event.set()  # 触发停止其他线程

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], 
                                       stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()
model = tf.keras.models.load_model("./models_train/imdb_lstm_model.h5")  # 重新加载模型以进行保存性能监控

# 确保 X_test 被加载以便进行推理
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=20000)
X_test = pad_sequences(X_test, maxlen=100)

thread1 = threading.Thread(target=save_model, args=(stop_event, model))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "GPU Power" 和 "CPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or 
                  line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

# 保存过滤后的内容到文件
output_file_name = './imbd_models/output_lstm/output-imdb-lstm-onnx.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

# 提取每一个采样点的数字，即CPU和GPU的具体mV
numbers = []
for line in filtered_lines:
    match = re.search(r'[\d.]+', line)
    if match:
        numbers.append(float(match.group())) 

delta_time = duration * 2 / len(filtered_lines) if len(filtered_lines) > 0 else 0
numbers_scaled = [num * delta_time for num in numbers]
total_energy_consumption = sum(numbers_scaled)
print(f"Total energy consumption: {total_energy_consumption:.2f} mV")


Subprocess started.


2024-07-30 20:41:44.323676: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2024-07-30 20:41:44.810865: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)


Time taken to save ONNX model: 2.2595322132110596 seconds
Time taken for inference on 10 samples: 0.2019 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_lstm/output-imdb-lstm-onnx.txt
Total energy consumption: 20411.95 mV


lstm-core ml

In [9]:
import coremltools as ct
from coremltools.models import MLModel

In [10]:
import coremltools as ct  
thread_output = {}
duration = 0
inference_duration = 0
# X_test = None  # 确保在测试时能够使用到

# 定义监控资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', 
                                                  '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 定义保存模型的函数
def save_model(stop_event, model):
    global duration
    global inference_duration
    global X_test  # 确保引用全局变量

    start_time = time.time()
    
    # 将TensorFlow Keras模型转换为Core ML模型
    for i in range(2):
        try:
        # 使用实际的输入层名称和适当的形状
            input_feature = ct.TensorType(name="embedding_input", shape=(1, 100))
            mlmodel = ct.convert(model, inputs=[input_feature])
            mlmodel.save("imdb_lstm_model.mlpackage")
        except Exception as e:
            print(f"Error saving model: {e}")
    
    end_time = time.time()
    duration = end_time - start_time
    print(f'Time taken to save Core ML model: {duration} seconds')
    
    # 测量推理时间
    # 使用 Core ML 加载模型
    coreml_model = MLModel("imdb_lstm_model.mlpackage")
    
    # 准备 Core ML 输入
    X_test_sample = X_test[:10].astype(np.float32)  # 选择前10个样本进行推理
    X_test_sample = X_test_sample.reshape((10, 100))  # 调整形状以匹配输入

    # 测量推理时间
    start_time_inference = time.time()

    for i in range(10):
        input_data = {"embedding_input": X_test_sample[i:i+1]}
        coreml_out = coreml_model.predict(input_data)

    end_time_inference = time.time()
    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {len(X_test_sample)} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程


# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], 
                                       stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()
model = tf.keras.models.load_model("./models_train/imdb_lstm_model.h5") 

# 确保 X_test 被加载以便进行推理
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=20000)
X_test = pad_sequences(X_test, maxlen=100)

thread1 = threading.Thread(target=save_model, args=(stop_event, model))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "GPU Power" 和 "CPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or 
                  line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

# 保存过滤后的内容到文件
output_file_name = './imbd_models/output_lstm/output-imdb-lstm-coreml.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

# 提取每一个采样点的数字，即CPU和GPU的具体mV
numbers = []
for line in filtered_lines:
    match = re.search(r'[\d.]+', line)
    if match:
        numbers.append(float(match.group())) 

delta_time = duration * 2 / len(filtered_lines) if len(filtered_lines) > 0 else 0
numbers_scaled = [num * delta_time for num in numbers]
total_energy_consumption = sum(numbers_scaled)
print(f"Total energy consumption: {total_energy_consumption:.2f} mV")


Subprocess started.


2024-07-30 20:43:49.920057: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2024-07-30 20:43:50.327184: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
When both 'convert_to' and 'minimum_deployment_target' not specified, 'convert_to' is set to "mlprogram" and 'minimum_deployment_target' is set to ct.target.iOS15 (which is same as ct.target.macOS12). Note: the model will not run on systems older than iOS15/macOS12/watchOS8/tvOS15. In order to make your model run on older system, please set the 'minimum_deployment_target' to iOS14/iOS13. Details please see the link: https://apple.github.io/coremltools/docs-guides/source/target-conversion-formats.html
2024-07-30 20:43:51.084367: I tensorflow/core/grappler/devices.cc:75] Number of eligib

Time taken to save Core ML model: 11.221569061279297 seconds
Time taken for inference on 10 samples: 2.2902 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_lstm/output-imdb-lstm-coreml.txt
Total energy consumption: 68004.46 mV


### pytorch

In [11]:
# 加载 IMDB 数据集 （确保不重新训练模型也能推理）
max_features = 20000  # 使用的单词数量
maxlen = 100  # 每条评论的最大长度

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 数据预处理和填充
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [12]:
import tensorflow as tf
import torch
import torch.nn as nn
import torch
import h5py

# 用pytorch定义LSTM模型

class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm1 = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.lstm2 = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.fc1 = nn.Linear(hidden_dim, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm1(x)
        x, (h_n, c_n) = self.lstm2(x)  # 仅获取最后一个时间步的输出
        x = h_n[-1]  # 取最后一个层的隐藏状态
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

def save_keras_lstm_to_pytorch(modelpath):
    # 加载已经训练的TensorFlow模型，保存参数
    model = tf.keras.models.load_model(modelpath)
    model.save_weights('imdb_lstm_weights.h5')

    # 加载 Keras 权重
    # 打开 HDF5 文件
    weights = h5py.File('imdb_lstm_weights.h5', 'r')

    # 创建 PyTorch 模型实例
    pytorch_model = LSTMModel(max_features, 128, 128)

    # 加载 Embedding 层权重
    pytorch_model.embedding.weight.data.copy_(torch.from_numpy(weights['embedding/embedding/embeddings:0'][()]))

    # 加载第一个 LSTM 层权重
    # LSTM层包括kernel（即weights_ih）和recurrent_kernel（即weights_hh）以及bias
    pytorch_model.lstm1.weight_ih_l0.data.copy_(torch.from_numpy(weights['lstm/lstm/lstm_cell/kernel:0'][()].transpose()))
    pytorch_model.lstm1.weight_hh_l0.data.copy_(torch.from_numpy(weights['lstm/lstm/lstm_cell/recurrent_kernel:0'][()].transpose()))
    pytorch_model.lstm1.bias_ih_l0.data.copy_(torch.from_numpy(weights['lstm/lstm/lstm_cell/bias:0'][()]))
    pytorch_model.lstm1.bias_hh_l0.data.fill_(0)  # 清零，因为Keras中的偏置是合并的

    # 加载第二个 LSTM 层权重
    pytorch_model.lstm2.weight_ih_l0.data.copy_(torch.from_numpy(weights['lstm_1/lstm_1/lstm_cell_1/kernel:0'][()].transpose()))
    pytorch_model.lstm2.weight_hh_l0.data.copy_(torch.from_numpy(weights['lstm_1/lstm_1/lstm_cell_1/recurrent_kernel:0'][()].transpose()))
    pytorch_model.lstm2.bias_ih_l0.data.copy_(torch.from_numpy(weights['lstm_1/lstm_1/lstm_cell_1/bias:0'][()]))
    pytorch_model.lstm2.bias_hh_l0.data.fill_(0)  # 清零

    # 加载第一个 Dense 层权重和偏置
    pytorch_model.fc1.weight.data.copy_(torch.from_numpy(weights['dense/dense/kernel:0'][()].transpose()))
    pytorch_model.fc1.bias.data.copy_(torch.from_numpy(weights['dense/dense/bias:0'][()]))

    # 加载第二个 Dense 层权重和偏置
    pytorch_model.fc2.weight.data.copy_(torch.from_numpy(weights['dense_1/dense_1/kernel:0'][()].transpose()))
    pytorch_model.fc2.bias.data.copy_(torch.from_numpy(weights['dense_1/dense_1/bias:0'][()]))

    weights.close()  # 关闭文件

    # 现在你的 PyTorch 模型已经加载了 Keras 模型的所有权重

    return pytorch_model

    # 将模型设置为评估模式

# pytorch_model = save_keras_lstm_to_pytorch("./models_train/imdb_lstm_model.h5")

In [14]:
thread_output = {}
duration = 0
inference_duration = 0  # 添加用于存储推理时间的变量

# 定义监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', 
                                                  '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 定义保存模型的函数
def save_model(stop_event, model):
    global duration
    global inference_duration
    print("Saving model...")
    start_time = time.time()

    # 保存整个模型为 PyTorch 格式
    pytorch_model = save_keras_lstm_to_pytorch("./models_train/imdb_lstm_model.h5")
    for i in range(1):
        torch.save(pytorch_model, 'imdb_lstm_model.pth')

    end_time = time.time()
    duration = end_time - start_time
    print(f'Time taken to save PyTorch model: {duration:.2f} seconds')

    
    # 准备输入数据
    X_test_sample = X_test[:10].astype(np.float32)  # 选择前10个样本进行推理
    X_test_sample_torch = torch.from_numpy(X_test_sample).long()  # 转换为整数张量

    # 测量推理时间
    start_time_inference = time.time()
    # 加载模型
    pytorch_model.eval()

    # 假设 X_test 是你要用于推理的输入数据
    for i in range(32):
        with torch.no_grad():
            output = pytorch_model(X_test_sample_torch)
            print("Inference output:", output)

    end_time_inference = time.time()
    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {len(X_test_sample)} samples: {inference_duration:.4f} seconds')


    stop_event.set()  # 触发停止其他线程

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        # 使用临时文件来存储子进程输出
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], 
                                       stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                # 尝试终止进程
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()  # 如果超时，则强制终止
                    process.wait()
        
        # 读取临时文件的内容
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))


stop_event = threading.Event()
model = tf.keras.models.load_model("./models_train/imdb_lstm_model.h5") 

# 创建和启动线程
thread1 = threading.Thread(target=save_model, args=(stop_event, model))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))


thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
lines = content.split('\n')
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or 
                  line.startswith('CPU Power:')]

filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_lstm/output-imdb-lstm-pth.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")

Saving model...
Subprocess started.
Time taken to save PyTorch model: 1.11 seconds
Inference output: tensor([[0.1299],
        [0.9645],
        [0.9288],
        [0.1741],
        [0.9976],
        [0.9200],
        [0.9720],
        [0.0078],
        [0.7594],
        [0.9720]])
Inference output: tensor([[0.1299],
        [0.9645],
        [0.9288],
        [0.1741],
        [0.9976],
        [0.9200],
        [0.9720],
        [0.0078],
        [0.7594],
        [0.9720]])
Inference output: tensor([[0.1299],
        [0.9645],
        [0.9288],
        [0.1741],
        [0.9976],
        [0.9200],
        [0.9720],
        [0.0078],
        [0.7594],
        [0.9720]])
Inference output: tensor([[0.1299],
        [0.9645],
        [0.9288],
        [0.1741],
        [0.9976],
        [0.9200],
        [0.9720],
        [0.0078],
        [0.7594],
        [0.9720]])
Inference output: tensor([[0.1299],
        [0.9645],
        [0.9288],
        [0.1741],
        [0.9976],
        [0.92

# XGBOOST-IMDB

### JSON

In [1]:
import numpy as np
import xgboost as xgb
from tensorflow.keras.datasets import imdb
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
import threading
import time
import subprocess
import os
import psutil
import tempfile
import re

2024-07-27 15:10:18.691966: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [26]:
# 加载 IMDB 数据集
max_features = 20000  # 使用的单词数量
maxlen = 100  # 每条评论的最大长度

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 将序列转换为文本
word_index = imdb.get_word_index()
index_word = {v: k for k, v in word_index.items()}

def sequences_to_texts(sequences):
    return [' '.join([index_word.get(i - 3, '?') for i in seq]) for seq in sequences]

X_train_text = sequences_to_texts(X_train)
X_test_text = sequences_to_texts(X_test)

# 使用TF-IDF向量化文本数据
vectorizer = TfidfVectorizer(max_features=max_features)
X_train_tfidf = vectorizer.fit_transform(X_train_text)
X_test_tfidf = vectorizer.transform(X_test_text)

In [15]:
## 模型训练
# XGBoost 对象
model = xgb.XGBClassifier(
    objective='binary:logistic',  # 目标函数，二分类问题使用'logistic'目标函数
    n_estimators=100,             # 树的个数
    max_depth=6,                  # 树的深度
    learning_rate=0.1             # 学习速率
)

# 训练模型
model.fit(X_train_tfidf, y_train)

# 预测测试集
y_pred = model.predict(X_test_tfidf)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print("Test accuracy: {:.2f}%".format(accuracy * 100))

# 保存模型到本地文件
model.save_model('./models_train/imdb_xgboost_model.json')

Test accuracy: 83.10%


In [3]:
# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    start_time = time.time()
    for i in range(50):
        model.save_model('imdb_xgboost_model.json')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    # 确保输入数据格式正确
    X_test_sample = X_test_tfidf[:10]

    start_time_inference = time.time()
    # 进行推理
    for i in range(10): # 1*10
        model.predict(X_test_tfidf)
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_sample.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载XGBoost模型
loaded_model = xgb.XGBClassifier()
loaded_model.load_model('./models_train/imdb_xgboost_model.json')

# save_model(stop_event, loaded_model, X_test_tfidf)

thread1 = threading.Thread(target=save_model, args=(stop_event, loaded_model, X_test_tfidf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_xgboost/output-imdb-xgboost-json.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")

Subprocess started.
Time taken to save model: 0.9407 seconds
Time taken for inference on 10 samples: 0.0781 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_xgboost/output-imdb-xgboost-json.txt
26
['CPU Power: 15847 mW', 'GPU Power: 209 mW', 'CPU Power: 7463 mW', 'GPU Power: 65 mW', 'CPU Power: 4381 mW', 'GPU Power: 64 mW', 'CPU Power: 11787 mW', 'GPU Power: 0 mW', 'CPU Power: 15744 mW', 'GPU Power: 65 mW', 'CPU Power: 16762 mW', 'GPU Power: 67 mW', 'CPU Power: 17007 mW', 'GPU Power: 194 mW', 'CPU Power: 17433 mW', 'GPU Power: 0 mW', 'CPU Power: 14861 mW', 'GPU Power: 0 mW', 'CPU Power: 24441 mW', 'GPU Power: 64 mW', 'CPU Power: 21598 mW', 'GPU Power: 0 mW', 'CPU Power: 3503 mW', 'GPU Power: 135 mW', 'CPU Power: 4417 mW', 'GPU Power: 0 mW']
Total energy consumption: 12743.47 mV


XGBOOST-onnx

In [6]:
import xgboost as xgb
import onnxmltools
import onnxruntime as ort
from skl2onnx.common.data_types import FloatTensorType
import numpy as np
from scipy.sparse import issparse
from sklearn.metrics import accuracy_score

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # 定义ONNX转换器的输入类型
    initial_type = [('float_input', FloatTensorType([None, max_features]))]

    start_time = time.time()
    # 将 XGBoost 模型转换为 ONNX 格式
    for i in range(1):
        onnx_model = onnxmltools.convert_xgboost(model, initial_types=initial_type)
        onnx_model_path = 'imdb_xgboost_model.onnx'
        onnxmltools.utils.save_model(onnx_model, onnx_model_path)
        initial_type = [('float_input', FloatTensorType([None, X_test_tfidf.shape[1]]))]
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    ort_session = ort.InferenceSession(onnx_model_path)
    # 准备输入数据
    def to_numpy(matrix):
        if issparse(matrix):
            return matrix.todense().astype(np.float32)
        return matrix.astype(np.float32)
    
    X_test_numpy = to_numpy(X_test_tfidf)
    
    # 确保输入数据的形状与模型期望的一致
    if X_test_numpy.shape[1] < max_features:
        padding = np.zeros((X_test_numpy.shape[0], max_features - X_test_numpy.shape[1]), dtype=np.float32)
        X_test_numpy = np.hstack((X_test_numpy, padding))
    elif X_test_numpy.shape[1] > max_features:
        X_test_numpy = X_test_numpy[:, :max_features]

    input_name = ort_session.get_inputs()[0].name
    output_name = ort_session.get_outputs()[0].name

    # 确保输入数据格式正确
    X_test_sample = X_test_numpy[:10]
    input_data = {input_name: X_test_sample}

    start_time_inference = time.time()
    # 进行推理
    for i in range(10): # 1*10
        pred_onnx = ort_session.run([output_name], input_data)[0]
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_sample.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载XGBoost模型
model = xgb.XGBClassifier()
model.load_model('./models_train/imdb_xgboost_model.json')


thread1 = threading.Thread(target=save_model, args=(stop_event, model, X_test_tfidf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_xgboost/output-imdb-xgboost-onnx.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 0.2102 seconds
Time taken for inference on 10 samples: 0.0180 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_xgboost/output-imdb-xgboost-onnx.txt
1046
['CPU Power: 6075 mW', 'GPU Power: 48 mW', 'CPU Power: 7894 mW', 'GPU Power: 95 mW', 'CPU Power: 8757 mW', 'GPU Power: 97 mW', 'CPU Power: 8425 mW', 'GPU Power: 49 mW', 'CPU Power: 8920 mW', 'GPU Power: 53 mW', 'CPU Power: 6722 mW', 'GPU Power: 0 mW', 'CPU Power: 5661 mW', 'GPU Power: 124 mW', 'CPU Power: 5309 mW', 'GPU Power: 0 mW', 'CPU Power: 5847 mW', 'GPU Power: 0 mW', 'CPU Power: 8971 mW', 'GPU Power: 58 mW', 'CPU Power: 8534 mW', 'GPU Power: 0 mW', 'CPU Power: 9563 mW', 'GPU Power: 55 mW', 'CPU Power: 8655 mW', 'GPU Power: 94 mW', 'CPU Power: 5608 mW', 'GPU Power: 48 mW', 'CPU Power: 5015 mW', 'GPU Power: 0 mW', 'CPU Power: 5925 mW', 'GPU Power: 52 mW', 'CPU Power: 6221 mW', 'GPU Power: 52 mW', 'CPU Power: 9415 mW', 'GPU Power: 109 mW',

## xgoost - PMML

In [1]:
import pandas as pd
from keras.datasets import imdb
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from nyoka import xgboost_to_pmml
from pypmml import Model

# 加载IMDB数据集
max_features = 20000  # 使用的单词数量
maxlen = 100  # 每条评论的最大长度

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 将序列转换为文本
word_index = imdb.get_word_index()
index_word = {v: k for k, v in word_index.items()}

def sequences_to_texts(sequences):
    return [' '.join([index_word.get(i - 3, '?') for i in seq]) for seq in sequences]

X_train_text = sequences_to_texts(X_train)
X_test_text = sequences_to_texts(X_test)

# 使用TF-IDF向量化文本数据
vectorizer = TfidfVectorizer(max_features=max_features)

# 创建Pipeline对象
pipeline_obj = Pipeline([
    ('tfidf', vectorizer),
    ('xgbc', XGBClassifier(objective='binary:logistic', n_estimators=100, max_depth=6, learning_rate=0.1))
])

# 训练模型
pipeline_obj.fit(X_train_text, y_train)

# 手动创建特征名称列表
col_names = ['tfidf_' + str(i) for i in range(max_features)]

# 保存模型为PMML格式
xgboost_to_pmml(pipeline_obj, col_names=col_names, target_name='sentiment', pmml_f_name='imdb_xgboost_model.pmml')


2024-07-27 17:09:41.809756: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


AttributeError: Coefficients are not defined for Booster type None

In [None]:
from nyoka.skl import pre_process

In [None]:
# scikit-learn            1.3.0
# 降级0.22

## xgboost-pytorch

In [6]:
import threading
import numpy as np
import time
import subprocess
import os
import psutil
import tensorflow as tf
import tempfile
import re

In [7]:
import xgboost as xgb
import hummingbird.ml
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # 将XGBoost模型转换为PyTorch模型
    start_time = time.time()
    for i in range(1):
        pytorch_model = hummingbird.ml.convert(model, 'pytorch')
        # 保存PyTorch模型
        torch.save(pytorch_model.model, './imdb_xgboost_model.pth')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    # 准备输入数据
    start_time_inference = time.time()
    # 将测试集数据转换为PyTorch张量
    X_test_torch = torch.tensor(X_test_tfidf.toarray(), dtype=torch.float32)

    # 使用转换后的PyTorch模型进行预测
    # 进行推理
    for i in range(10): # 1*10
        y_pred_torch = pytorch_model.predict(X_test_torch)

    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_torch.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载XGBoost模型
model = xgb.XGBClassifier()
model.load_model('./models_train/imdb_xgboost_model.json')


thread1 = threading.Thread(target=save_model, args=(stop_event, model, X_test_tfidf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_xgboost/output-imdb-xgboost-pth.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 0.3478 seconds
Time taken for inference on 25000 samples: 0.0000 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_xgboost/output-imdb-xgboost-pth.txt
550
['CPU Power: 15874 mW', 'GPU Power: 961 mW', 'CPU Power: 13247 mW', 'GPU Power: 445 mW', 'CPU Power: 15187 mW', 'GPU Power: 458 mW', 'CPU Power: 14894 mW', 'GPU Power: 532 mW', 'CPU Power: 14486 mW', 'GPU Power: 414 mW', 'CPU Power: 15647 mW', 'GPU Power: 231 mW', 'CPU Power: 14354 mW', 'GPU Power: 321 mW', 'CPU Power: 15096 mW', 'GPU Power: 491 mW', 'CPU Power: 14550 mW', 'GPU Power: 331 mW', 'CPU Power: 13604 mW', 'GPU Power: 589 mW', 'CPU Power: 11953 mW', 'GPU Power: 455 mW', 'CPU Power: 15910 mW', 'GPU Power: 234 mW', 'CPU Power: 16035 mW', 'GPU Power: 529 mW', 'CPU Power: 14750 mW', 'GPU Power: 554 mW', 'CPU Power: 13947 mW', 'GPU Power: 911 mW', 'CPU Power: 15672 mW', 'GPU Power: 546 mW', 'CPU Power: 13245 mW', 'GPU Power: 646 mW', 'CP

# Random forest - IMDB

In [3]:
import numpy as np
import pickle
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import imdb
from sklearn.feature_extraction.text import TfidfVectorizer
from joblib import dump

In [4]:
# 加载 IMDB 数据集
max_features = 20000  # 使用的单词数量
maxlen = 100  # 每条评论的最大长度

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

# 将序列转换为文本
word_index = imdb.get_word_index()
index_word = {v: k for k, v in word_index.items()}

def sequences_to_texts(sequences):
    return [' '.join([index_word.get(i - 3, '?') for i in seq]) for seq in sequences]

X_train_text = sequences_to_texts(X_train)
X_test_text = sequences_to_texts(X_test)

# 使用TF-IDF向量化文本数据
vectorizer = TfidfVectorizer(max_features=max_features)
X_train_tfidf = vectorizer.fit_transform(X_train_text)
X_test_tfidf = vectorizer.transform(X_test_text)
X_test_tfidf_1 = vectorizer.transform(X_test_text[:1])
X_test_tfidf_10 = vectorizer.transform(X_test_text[:10])

# 创建随机森林分类器
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# 训练模型
clf.fit(X_train_tfidf, y_train)

# 预测测试集
y_pred = clf.predict(X_test_tfidf)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {accuracy * 100:.2f}%')

Test accuracy: 83.77%


In [15]:
# 保存训练好的模型到文件
model_filename = ('./models_train/imdb_random_forest_model.joblib')
dump(clf, model_filename)
vectorizer_filename = './models_train/imdb_random_forest_tfidf_vectorizer.joblib'
dump(vectorizer, vectorizer_filename)
print(f"Model saved to {model_filename}")
# 保存训练好的模型和向量化器到文件

Model saved to ./models_train/imdb_random_forest_model.joblib


In [6]:
import joblib
import time
from sklearn.ensemble import RandomForestClassifier 
import threading
import time
import subprocess
import psutil
import tempfile
import re
import os

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # model = joblib.load('./models_train/imdb_random_forest_model.joblib')

    start_time = time.time()
    for i in range(10):
        joblib.dump(model, 'imdb_random_forest_model.joblib')
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    for i in range(32): # 1*10
        model.predict(X_test_tfidf)
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载保存的模型
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_1))
#thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_10))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-onnx.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Time taken to save model: 1.8490 seconds
Time taken for inference on samples: 0.3361 seconds
Subprocess finished.
Resource monitoring finished.
Content saved to ./imbd_models/output_random_forest/output-imdb-random_forest-onnx.txt
138
['CPU Power: 11120 mW', 'GPU Power: 147 mW', 'CPU Power: 13589 mW', 'GPU Power: 296 mW', 'CPU Power: 10731 mW', 'GPU Power: 160 mW', 'CPU Power: 13908 mW', 'GPU Power: 151 mW', 'CPU Power: 19089 mW', 'GPU Power: 61 mW', 'CPU Power: 14804 mW', 'GPU Power: 86 mW', 'CPU Power: 12327 mW', 'GPU Power: 44 mW', 'CPU Power: 12014 mW', 'GPU Power: 49 mW', 'CPU Power: 14506 mW', 'GPU Power: 55 mW', 'CPU Power: 17299 mW', 'GPU Power: 55 mW', 'CPU Power: 14956 mW', 'GPU Power: 146 mW', 'CPU Power: 16730 mW', 'GPU Power: 50 mW', 'CPU Power: 16060 mW', 'GPU Power: 110 mW', 'CPU Power: 16373 mW', 'GPU Power: 53 mW', 'CPU Power: 16951 mW', 'GPU Power: 56 mW', 'CPU Power: 16146 mW', 'GPU Power: 54 mW', 'CPU Power: 18363 mW', 'GPU Power: 55 mW', 'CPU Po

### onnx

In [7]:
import joblib
import onnx
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.datasets import imdb

In [8]:
from scipy.sparse import hstack

In [11]:

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model, X_test_tfidf):
    global duration
    global inference_duration

    # 将random_forest模型转换为PyTorch模型
    start_time = time.time()
    for i in range(1):
        # 转换为ONNX格式
        initial_type = [('float_input', FloatTensorType([None, max_features]))]
        onnx_model = convert_sklearn(clf, initial_types=initial_type,  target_opset=9)

        # 保存ONNX模型
        onnx_model_filename = 'imdb_random_forest_model.onnx'
        with open(onnx_model_filename, "wb") as f:
            f.write(onnx_model.SerializeToString())
        print(f"ONNX model saved to {onnx_model_filename}")
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    # 准备输入数据
    # 如果特征数量少于max_features，则补充空的特征
    if X_test_tfidf.shape[1] < max_features:
        padding = np.zeros((X_test_tfidf.shape[0], max_features - X_test_tfidf.shape[1]))
        X_test_tfidf = hstack([X_test_tfidf, padding])
    X_test_tfidf_array = X_test_tfidf.toarray().astype(np.float32)

    # 使用ONNX runtime进行推理
    sess = rt.InferenceSession(onnx_model.SerializeToString())
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name

    # 进行推理
    for i in range(32): # 1*10
        predictions = sess.run([label_name], {input_name: X_test_tfidf_array})[0]
    # 进行推理
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_tfidf_array.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载保存的模型
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_1))
#thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_10))
thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-onnx.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


ONNX model saved to imdb_random_forest_model.onnx
Time taken to save model: 39.3827 seconds
Subprocess started.
Time taken for inference on 25000 samples: 22.7815 seconds
Resource monitoring finished.
ONNX model saved to imdb_random_forest_model.onnx
Time taken to save model: 29.3909 seconds
Time taken for inference on 25000 samples: 18.0317 seconds
Resource monitoring finished.
Subprocess finished.
Content saved to ./imbd_models/output_random_forest/output-imdb-random_forest-onnx.txt
5908
['CPU Power: 10864 mW', 'GPU Power: 65 mW', 'CPU Power: 10449 mW', 'GPU Power: 63 mW', 'CPU Power: 7635 mW', 'GPU Power: 62 mW', 'CPU Power: 6738 mW', 'GPU Power: 63 mW', 'CPU Power: 3004 mW', 'GPU Power: 63 mW', 'CPU Power: 5210 mW', 'GPU Power: 0 mW', 'CPU Power: 3496 mW', 'GPU Power: 127 mW', 'CPU Power: 4663 mW', 'GPU Power: 65 mW', 'CPU Power: 7941 mW', 'GPU Power: 64 mW', 'CPU Power: 9179 mW', 'GPU Power: 63 mW', 'CPU Power: 8578 mW', 'GPU Power: 63 mW', 'CPU Power: 7674 mW', 'GPU Power: 62 mW'

## pmml

In [16]:
from nyoka import PMML44 as pml
from pypmml import Model
import joblib
from nyoka import skl_to_pmml
from sklearn.pipeline import Pipeline

In [17]:

# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, model):
    global duration
    global inference_duration

    # 将random_forest模型转换为PyTorch模型
    start_time = time.time()
    for i in range(1):
        # 创建一个Pipeline
        pipeline = Pipeline([("vectorizer", vectorizer), ("classifier", model)])

        # 导出为PMML
        pmml_filename = 'imdb_random_forest_model.pmml'
        skl_to_pmml(pipeline, col_names=vectorizer.get_feature_names_out(), target_name="sentiment", pmml_f_name=pmml_filename)
    print(f"Model exported to {pmml_filename}")
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    start_time_inference = time.time()
    # 准备输入数据
    # 加载PMML模型
    model = Model.load(pmml_filename)

    # 准备输入数据
    input_text = "This movie was fantastic! I really enjoyed it."
    input_vector = vectorizer.transform([input_text]*10)

    # 进行推理
    y_pred = model.predict(input_vector.toarray())
    # 进行推理
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {input_vector.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载保存的模型
model_filename = './models_train/imdb_random_forest_model.joblib'
clf = joblib.load(model_filename)

thread1 = threading.Thread(target=save_model, args=(stop_event, clf))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_random_forest/output-imdb-random_forest-pmml.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Exception in thread Thread-27:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/env_name/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/opt/anaconda3/envs/env_name/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/tm/w_n0tvhd2jlds4ml_jrhf3yw0000gn/T/ipykernel_87542/3650548247.py", line 19, in save_model
  File "/opt/anaconda3/envs/env_name/lib/python3.8/site-packages/nyoka/skl/skl_to_pmml.py", line 71, in skl_to_pmml
    pml_pp = pp.get_preprocess_val(ppln_sans_predictor, col_names, model)
  File "/opt/anaconda3/envs/env_name/lib/python3.8/site-packages/nyoka/skl/pre_process.py", line 112, in get_preprocess_val
    pp_dict = get_pml_derived_flds(trfm, updated_colnames, model=model)
  File "/opt/anaconda3/envs/env_name/lib/python3.8/site-packages/nyoka/skl/pre_process.py", line 185, in get_pml_derived_flds
    return tfidf_vectorizer(trfm, col_names)
  File "/opt/anaconda3/en

Subprocess started.


KeyboardInterrupt: 

# SVM-IMDB

## onnx

In [6]:
import joblib
import numpy as np
from onnxmltools import convert_sklearn
from onnxmltools.convert.common.data_types import FloatTensorType
import onnxruntime as rt
from sklearn.datasets import load_files
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import svm
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import imdb
from scipy.sparse import hstack
import re

In [58]:
# # 定义输入数据的类型
# initial_type = [('float_input', FloatTensorType([None, max_features]))]
# # 将模型转换为 ONNX 格式
# onnx_model = convert_sklearn(clf, initial_types=initial_type, target_opset=9)

# # 保存 ONNX 模型
# with open("imdb_svm_model.onnx", "wb") as f:
#     f.write(onnx_model.SerializeToString())

# # 使用 ONNX 进行推理
# sess = rt.InferenceSession("imdb_svm_model.onnx")

# # 获取 ONNX 模型输入输出名字
# input_name = sess.get_inputs()[0].name
# label_name = sess.get_outputs()[0].name

# 转换测试数据为 ONNX 需要的格式
# if X_test_tfidf.shape[1] < max_features:
#     padding = np.zeros((X_test_tfidf.shape[0], max_features - X_test_tfidf.shape[1]))
#     X_test_tfidf = hstack([X_test_tfidf, padding])
# X_test_tfidf_array = X_test_tfidf.toarray().astype(np.float32)

# # 进行推理
# pred_onnx = sess.run([label_name], {input_name: X_test_tfidf_array})[0]

# # 计算准确率
# accuracy_onnx = accuracy_score(y_test, pred_onnx)
# print(f'Test accuracy (ONNX): {accuracy_onnx * 100:.2f}%')

Test accuracy (ONNX): 40.00%


In [5]:
# 初始化全局变量
thread_output = {}
duration = 0
inference_duration = 0

# 定义保存模型的函数
def save_model(stop_event, clf, X_test_tfidf):
    global duration
    global inference_duration

    # 将svm模型转换为onnx模型
    start_time = time.time()
    for i in range(1):
        # 将模型转换为 ONNX 格式
        initial_type = [('float_input', FloatTensorType([None, max_features]))]
        # 将模型转换为 ONNX 格式
        onnx_model = convert_sklearn(clf, initial_types=initial_type, target_opset=9)

        # 保存 ONNX 模型
        with open("imdb_svm_model.onnx", "wb") as f:
            f.write(onnx_model.SerializeToString())
    print(f"Model exported to imdb_svm_model.onnx")
    end_time = time.time()

    duration = end_time - start_time
    print(f'Time taken to save model: {duration:.4f} seconds')

    # 准备输入数据
    # 使用 ONNX 进行推理
    sess = rt.InferenceSession("imdb_svm_model.onnx")

    # 获取 ONNX 模型输入输出名字
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name

    # 转换测试数据为 ONNX 需要的格式
    if X_test_tfidf.shape[1] < max_features:
        padding = np.zeros((X_test_tfidf.shape[0], max_features - X_test_tfidf.shape[1]))
        X_test_tfidf = hstack([X_test_tfidf, padding])
    X_test_tfidf_array = X_test_tfidf.toarray().astype(np.float32)

    start_time_inference = time.time()
    # 进行推理
    pred_onnx = sess.run([label_name], {input_name: X_test_tfidf_array})[0]
    # 进行推理
    end_time_inference = time.time()

    inference_duration = end_time_inference - start_time_inference
    print(f'Time taken for inference on {X_test_tfidf_array.shape[0]} samples: {inference_duration:.4f} seconds')

    stop_event.set()  # 触发停止其他线程

# 监控保存模型时的资源使用率的线程函数
def monitor_resources_during_save(stop_event):
    cpu_usage = []
    gpu_usage = []

    while not stop_event.is_set():
        cpu_usage.append(psutil.cpu_percent(interval=0.1))
        try:
            gpu_output = subprocess.check_output(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'])
            gpu_usage.append(int(gpu_output.strip()))
        except Exception as e:
            gpu_usage.append(None)  # 如果没有GPU或nvidia-smi命令失败，则记录None

    # 保存监测结果
    thread_output['cpu_usage'] = cpu_usage
    thread_output['gpu_usage'] = gpu_usage
    print("Resource monitoring finished.")

# 运行外部脚本并捕获输出
def run_script(stop_event):
    try:
        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
            process = subprocess.Popen(['/Users/anelloyi/Desktop/run_powermetrics.sh'], stdout=tmp_file, stderr=subprocess.STDOUT, text=True)
            print("Subprocess started.")
            while not stop_event.is_set():
                if process.poll() is not None:  # 检查进程是否已经结束
                    break
            
            if process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=0.1)
                except subprocess.TimeoutExpired:
                    process.kill()
                    process.wait()
        
        with open(tmp_file.name, 'r') as f:
            thread_output['powermetrics'] = f.read()
        
        os.remove(tmp_file.name)  # 删除临时文件
        print("Subprocess finished.")
    except Exception as e:
        thread_output['powermetrics'] = str(e)
        print("Exception in subprocess:", str(e))

# 创建和启动线程
stop_event = threading.Event()

# 加载保存的模型
clf = joblib.load('./models_train/imdb_svm_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')

thread1 = threading.Thread(target=save_model, args=(stop_event, clf, X_test_tfidf_tmp))
thread2 = threading.Thread(target=monitor_resources_during_save, args=(stop_event,))
thread3 = threading.Thread(target=run_script, args=(stop_event,))
thread1.start()
thread2.start()
thread3.start()

# 等待线程完成
thread1.join()
thread2.join()
thread3.join()

# 输出从线程收集的数据
content = thread_output.get('powermetrics', 'No output captured')
# 将内容按行拆分
lines = content.split('\n')
# 筛选出以 "CPU consume" 和 "GPU Power" 开头的行
filtered_lines = [line for line in lines if line.startswith('GPU Power:') or line.startswith('CPU Power:')]
# 将筛选后的行合并为一个字符串，每行之间用换行符分隔
filtered_content = '\n'.join(filtered_lines)

output_file_name = './imbd_models/output_svm/output-imdb-svm-onnx.txt'
with open(output_file_name, 'w') as file:
    file.write(filtered_content)
    file.write(f'\nTotal Duration(s): {duration:.2f}')
    file.write(f'\nInference Duration(s): {inference_duration:.4f}')
print(f"Content saved to {output_file_name}")

filtered_lines_count = len(filtered_lines)
print(filtered_lines_count)
print(filtered_lines)

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Subprocess started.
Model exported to imdb_svm_model.onnx
Time taken to save model: 75.9331 seconds
Time taken for inference on 10 samples: 0.0505 seconds
Resource monitoring finished.
Subprocess finished.
Content saved to ./imbd_models/output_svm/output-imdb-svm-onnx.txt
10176
['CPU Power: 13629 mW', 'GPU Power: 584 mW', 'CPU Power: 14898 mW', 'GPU Power: 318 mW', 'CPU Power: 13757 mW', 'GPU Power: 268 mW', 'CPU Power: 14250 mW', 'GPU Power: 318 mW', 'CPU Power: 14010 mW', 'GPU Power: 136 mW', 'CPU Power: 8773 mW', 'GPU Power: 256 mW', 'CPU Power: 7744 mW', 'GPU Power: 411 mW', 'CPU Power: 8320 mW', 'GPU Power: 262 mW', 'CPU Power: 13218 mW', 'GPU Power: 260 mW', 'CPU Power: 14866 mW', 'GPU Power: 250 mW', 'CPU Power: 14295 mW', 'GPU Power: 322 mW', 'CPU Power: 15094 mW', 'GPU Power: 129 mW', 'CPU Power: 13150 mW', 'GPU Power: 383 mW', 'CPU Power: 13775 mW', 'GPU Power: 319 mW', 'CPU Power: 11577 mW', 'GPU Power: 254 mW', 'CPU Power: 7869 mW', 'GPU Power: 271 mW', 'CPU Power: 8399 mW'

NameError: name 're' is not defined

In [7]:

# 确保 filtered_lines_count 不为零
if filtered_lines_count > 0:
    # 提取每一个采样点的数字，即CPU和GPU的具体mV
    numbers = []
    for line in filtered_lines:
        match = re.search(r'[\d.]+', line)
        if match:
            numbers.append(float(match.group()))

    delta_time = duration * 2 / filtered_lines_count
    numbers_scaled = [num * delta_time for num in numbers]
    total_energy_consumption = sum(numbers_scaled)
    print(f"Total energy consumption: {total_energy_consumption:.2f} mV")
else:
    print("No filtered lines to process.")


Total energy consumption: 456172.21 mV
