From 9c55721d3edbb14abb39ed8676a41ed6dad4e781 Mon Sep 17 00:00:00 2001
From: yumemiso <mengjianzou@qq.com>
Date: Wed, 27 May 2026 20:31:08 +0800
Subject: [PATCH] add common method for gpu virtualization in cloud

---
 GPU-Virtual-Service/gpu-remoting/README.md    |   2 +
 .../gpu-remoting/include/chunkStructure.h     |  75 ++
 .../gpu-remoting/include/configure.h          | 112 +++
 .../gpu-remoting/include/constVar.h           | 372 +++++++++
 .../gpu-remoting/include/define.h             | 552 +++++++++++++
 .../gpu-remoting/src/common/configure.cc      |  43 +
 .../gpu-remoting/src/common/elfHandle.cc      | 732 ++++++++++++++++++
 .../xpu-pool-service/.gitmodules              |   7 -
 .../xpu-pool-service/ci/VersionSet.xml        |   1 -
 .../xpu-pool-service/ci/app_define.json       |  13 -
 .../xpu-pool-service/ci/at/at_deploy.sh       |  31 -
 .../xpu-pool-service/ci/at/at_deploy.yml      |  53 --
 .../xpu-pool-service/ci/build.sh              | 101 ---
 .../xpu-pool-service/ci/build.yml             |  48 --
 .../xpu-pool-service/ci/buildinfo.sh          |  16 -
 .../ci/cmc/openSource_x86.xml                 |  24 -
 .../xpu-pool-service/ci/cmc/upload_cmc.xml    |  25 -
 .../xpu-pool-service/ci/cms_signature.sh      |  63 --
 .../xpu-pool-service/ci/dependency.xml        |   8 -
 .../xpu-pool-service/ci/hwp7s_signature.sh    |  47 --
 .../xpu-pool-service/ci/opensource.xml        |   6 -
 .../xpu-pool-service/ci/third_party           |   1 -
 .../ci/xpu_pool/build_x86.yml                 |  70 --
 .../ci/xpu_pool/build_xpu_package.sh          | 131 ----
 .../xpu_docker_build/acl_client/Dockerfile    |  10 -
 .../xpu_docker_build/cuda_client/Dockerfile   |  10 -
 .../xpu_docker_build/exporter/Dockerfile      |  30 -
 .../gpu-device-plugin/Dockerfile              |  19 -
 .../npu-device-plugin/Dockerfile              |  19 -
 29 files changed, 1888 insertions(+), 733 deletions(-)
 create mode 100644 GPU-Virtual-Service/gpu-remoting/README.md
 create mode 100644 GPU-Virtual-Service/gpu-remoting/include/chunkStructure.h
 create mode 100644 GPU-Virtual-Service/gpu-remoting/include/configure.h
 create mode 100644 GPU-Virtual-Service/gpu-remoting/include/constVar.h
 create mode 100644 GPU-Virtual-Service/gpu-remoting/include/define.h
 create mode 100644 GPU-Virtual-Service/gpu-remoting/src/common/configure.cc
 create mode 100644 GPU-Virtual-Service/gpu-remoting/src/common/elfHandle.cc
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/.gitmodules
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/VersionSet.xml
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/app_define.json
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/at/at_deploy.sh
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/at/at_deploy.yml
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/build.sh
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/build.yml
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/buildinfo.sh
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/cmc/openSource_x86.xml
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/cmc/upload_cmc.xml
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/cms_signature.sh
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/dependency.xml
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/hwp7s_signature.sh
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/opensource.xml
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/third_party
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/build_x86.yml
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/build_xpu_package.sh
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/acl_client/Dockerfile
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/cuda_client/Dockerfile
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/exporter/Dockerfile
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/gpu-device-plugin/Dockerfile
 delete mode 100644 GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/npu-device-plugin/Dockerfile

diff --git a/GPU-Virtual-Service/gpu-remoting/README.md b/GPU-Virtual-Service/gpu-remoting/README.md
new file mode 100644
index 0000000..12b3362
--- /dev/null
+++ b/GPU-Virtual-Service/gpu-remoting/README.md
@@ -0,0 +1,2 @@
+# Flexible GPU Virtualization in Cloud (FlexGV)
+
diff --git a/GPU-Virtual-Service/gpu-remoting/include/chunkStructure.h b/GPU-Virtual-Service/gpu-remoting/include/chunkStructure.h
new file mode 100644
index 0000000..b49e4d6
--- /dev/null
+++ b/GPU-Virtual-Service/gpu-remoting/include/chunkStructure.h
@@ -0,0 +1,75 @@
+#ifndef CHUNK_STRUCTURE_H
+#define CHUNK_STRUCTURE_H
+
+#include "constVar.h"
+#include <boost/thread/mutex.hpp>
+#include <boost/thread/thread.hpp>
+#include <stdint.h>
+
+typedef struct {
+    char *name;
+    size_t paramSize;
+    size_t paramNum;
+    uint16_t *paramOffsets;
+    uint16_t *paramSizes;
+    void *host_fun;
+} KernelInfo_t; // used to store kernel parmeters from client fatCubin
+
+struct KernelPtx_t {
+    std::string name;
+    std::string body;
+
+    KernelPtx_t(const char* n, size_t name_len, const char* b, size_t body_len)
+        : name(n, name_len), body(b, body_len) {}
+}; // used to store kernel body from PTX codes
+
+struct LdParamInfo_t {
+    bool isUsed;
+    size_t index;
+    size_t offset;
+
+    LdParamInfo_t(size_t idx, size_t off) : isUsed(false), index(idx), offset(off) {}
+};
+
+struct BatchInfo_t {
+    uint8_t curType;
+    size_t curBatchSize;
+};
+
+struct HostBuffer_t {
+    uint8_t* hostPtr;
+    size_t size;
+};
+
+struct TensorInfo_t {
+    void* devPtr;
+    size_t size;
+};
+
+struct Block_t{
+    uint64_t start;
+    uint64_t devPtr = 0;
+    size_t size = 0;
+    bool valid = false;
+    bool essential = false;
+};
+
+struct Handle_t {
+    uint64_t handlePtr = 0;
+    enum API_REQUEST_CODE_SET type;
+    bool valid = false;
+    uint64_t stream = 0;
+};
+
+struct Sync_t {
+    boost::mutex mutex;
+    boost::condition_variable cv;
+};
+
+// struct GpuInform{
+//     int GpuId;
+//     char IpAddr [IP_STRING_LEN];
+//     int Port;
+// };
+
+#endif //CHUNK_STRUCTURE_H
\ No newline at end of file
diff --git a/GPU-Virtual-Service/gpu-remoting/include/configure.h b/GPU-Virtual-Service/gpu-remoting/include/configure.h
new file mode 100644
index 0000000..cf445b6
--- /dev/null
+++ b/GPU-Virtual-Service/gpu-remoting/include/configure.h
@@ -0,0 +1,112 @@
+#ifndef BASICDEDUP_CONFIGURE_h
+#define BASICDEDUP_CONFIGURE_h
+
+#include <boost/property_tree/json_parser.hpp>
+#include <boost/property_tree/ptree.hpp>
+#include <boost/atomic.hpp>
+#include <boost/lockfree/queue.hpp>
+#include <boost/interprocess/file_mapping.hpp>
+#include <boost/interprocess/mapped_region.hpp>
+#include <boost/interprocess/exceptions.hpp>
+#include <boost/regex.hpp>
+#include <boost/intrusive/list.hpp>
+#include <sys/epoll.h>
+#include "define.h"
+#include "constVar.h"
+#include "chunkStructure.h"
+using namespace std;
+
+class Configure {
+private:
+    string serverIp_;
+    uint16_t serverPort_; 
+
+    uint64_t clientID_;
+    bool isClient_;
+    size_t reqGPUnum_;
+    size_t priority_;
+    string proxyIp_;
+    uint16_t proxyPort_;
+
+    string dpcIp_;
+    uint16_t dpcPort_;
+
+    string monIp_;
+    uint16_t monPort_;
+
+    size_t DDPreqGPUnum_;
+
+    string model_;
+    size_t batchSize_;
+
+
+    void ReadConf(std::string path);
+
+public:
+    Configure(std::string path, bool isClient = false);
+
+    ~Configure();
+
+    inline const string& GetServerIp() const noexcept {
+        return serverIp_;
+    }
+
+    inline uint16_t GetServerPort() {
+        return serverPort_;
+    }
+
+    inline uint64_t GetClientID() {
+        return clientID_;
+    }
+
+    inline size_t GetReqGPUnum() {
+        if (DDPreqGPUnum_ > 1) {
+            return DDPreqGPUnum_;
+        }
+        return reqGPUnum_;
+    }
+
+    inline size_t GetPriority() {
+        return priority_;
+    }
+
+    inline const string& GetProxyIp() const noexcept {
+        return proxyIp_;
+    }
+
+    inline uint16_t GetProxyPort() {
+        return proxyPort_;
+    }
+
+    inline const string& GetDpcIp() const noexcept {
+        return dpcIp_;
+    }
+
+    inline uint16_t GetDpcPort() {
+        return dpcPort_;
+    }
+
+    inline const string& GetMonIp() const noexcept {
+        return monIp_;
+    }
+
+    inline uint16_t GetMonPort() {
+        return monPort_;
+    }
+
+    inline size_t GetDDPreqGPUnum() {
+        return DDPreqGPUnum_;
+    }
+
+    inline const string& GetModel() const noexcept {
+        return model_;
+    }
+
+    inline size_t GetBatchSize() {
+        return batchSize_;
+    }
+
+
+};
+
+#endif
\ No newline at end of file
diff --git a/GPU-Virtual-Service/gpu-remoting/include/constVar.h b/GPU-Virtual-Service/gpu-remoting/include/constVar.h
new file mode 100644
index 0000000..d0ccbb1
--- /dev/null
+++ b/GPU-Virtual-Service/gpu-remoting/include/constVar.h
@@ -0,0 +1,372 @@
+#ifndef CONST_VAR_H
+#define CONST_VAR_H
+
+#include "define.h"
+
+#define IP_STRING_LEN          40
+#define PORT_STRING_LEN        8
+
+#define THREAD_NUM_PER_CLIENT  100
+#define IOV_MAX_NUM            1000
+#define PARAM_MAX_NUM          50
+#define REG_PARAM_MAX_NUM      128 * 1024
+#define TEST_STRING_LEN        50
+#define REQUEST_BUFFER_SIZE    3LL * 1024 * 1024 * 1024
+#define RECV_AM_SHIFT_BIT      8
+
+#define BLOCK_ID_BIT                17
+#define BLOCKS_MAX_NUM              (1LL << BLOCK_ID_BIT)                // 0.25M blocks
+#define BLOCK_SHIFT_BIT             (64 - BLOCK_ID_BIT)                  // max size with 64TB for each block, nearly A100x800
+
+#define HANDLE_MAX_NUM              800730
+#define HANDLE_PREFIX               ((1LL << 54) | (1LL << 52) | (1LL << 50) | (1LL << 48)) // prefix: 0x55000000000000
+#define HANDLE_MASK                 ((1LL << 48) - 1)      
+
+#define BACKUP_STREAM_NUM           4
+#define BACKUP_PERIOD               20
+#define BACKUP_API_MAX_NUM          10000
+#define COMM_EVENT_TIMEOUT          50
+
+// static const char* BACKUP_FILE_DIR = "/mnt/nvme0/FlexGV_Test/Bak/";
+static const char* BACKUP_FILE_DIR = "/mnt/nvme0/Bak163/";
+
+static const size_t CONN_RESERVED_NUM = 1024;
+static const uint32_t THREAD_STACK_SIZE = 8*1024*1024;
+
+static const size_t DEVICE_POINTER_SIZE = sizeof(void *) * 2 + 3;
+static const size_t HOST_POINTER_SIZE   = sizeof(void *) * 2 + 3;
+static const size_t DEVICE_FUNC_INFO_SMALL_SIZE   = 1 * 1024 + 512 + 256; 
+static const size_t DEVICE_FUNC_INFO_LARGE_SIZE   = 4 * 1024; 
+
+enum SERVER_STATUS_SET {
+    NORMAL_STATUS,
+    // NODE_FAILURE, // cudaError or process failure
+    EP_CLOSE,   // current endpoint ready to close (migration)
+    COMM_ABORT  // communication abort
+};
+
+enum API_REQUEST_CODE_SET {
+    /* CUDA Runtime API */
+    SERVER_STATUS = 1,
+    __CUDA_REGISTER = 2,
+    __CUDA_REGISTER_FAT_BINARY, 
+    __CUDA_REGISTER_FAT_BINARY_END, 
+    __CUDA_UNREGISTER_FAT_BINARY,
+    __CUDA_REGISTER_FUNCTION, 
+    __CUDA_REGISTER_VAR, 
+    CUDA_MALLOC, 
+    CUDA_MEMCPY,
+    CUDA_MEMCPY_H2D,
+    CUDA_MEMCPY_D2H,
+    CUDA_MEMCPY_D2D, 
+    NEW_ITERATION_REQ, // used for identifying the new iteration
+    CUDA_MEMCPY_ASYNC_H2D,
+    CUDA_MEMCPY_ASYNC_D2H,
+    CUDA_MEMCPY_ASYNC_D2D,
+    CUDA_MEMCPY_TO_SYMBOL, 
+    CUDA_MEM_GET_INFO, 
+    CUDA_FREE, 
+    CUDA_STREAM_CREATE,
+    CUDA_STREAM_CREATE_WITH_FLAGS,
+    CUDA_STREAM_CREATE_WITH_PRIORITY,
+    CUDA_STREAM_DESTROY,
+    CUDA_STREAM_WAIT_EVENT,
+    CUDA_STREAM_SYNCHRONIZE,
+    CUDA_STREAM_IS_CAPTURING,
+    CUDA_STREAM_GET_CAPTURE_INFO,
+    CUDA_EVENT_CREATE,
+    CUDA_EVENT_CREATE_WITH_FLAGS,
+    CUDA_EVENT_RECORD,
+    CUDA_EVENT_QUERY,
+    CUDA_EVENT_DESTROY,
+    CUDA_EVENT_ELAPSED_TIME,
+    CUDA_LAUNCH_KERNEL, 
+    CUDA_FUNC_GET_ATTRIBUTES,
+    CUDA_DEVICE_SYNCHRONIZE, 
+    CUDA_MEMSET, 
+    CUDA_MEMSET_ASYNC,
+    CUDA_SET_DEVICE, 
+    CUDA_SET_MAIN_DEVICE,
+    CUDA_GET_DEVICE_COUNT, 
+    CUDA_GET_DEVICE,
+    CUDA_GET_DEVICE_PROPERTIES, 
+    CUDA_DEVICE_GET_ATTRIBUTE,
+    CUDA_OCCUPANCY_MAX_ACTIVE_BLOCKS_PER_MULTIPROCESSOR,
+    CUDA_OCCUPANCY_MAX_ACTIVE_BLOCKS_PER_MULTIPROCESSOR_WITH_FLAGS,
+
+    /* cuBlas API */
+    CUBLAS_CREATE_V2,
+    CUBLAS_SGEMM_V2,
+    CUBLAS_SGEMM_STRIDED_BATCHED,
+    CUBLAS_DESTROY_V2,
+    CUBLAS_SET_STREAM_V2,
+    CUBLAS_SET_WORKSPACE_V2,
+    CUBLAS_SET_MATH_MODE,
+    CUBLAS_GET_MATH_MODE,
+
+    /* cuBlasLt API */
+    CUBLASLT_CREATE,
+    CUBLASLT_DESTROY,
+    CUBLASLT_MATMULDESC_CREATE,
+    CUBLASLT_MATMULDESC_DESTROY,
+    CUBLASLT_MATMULDESC_SETATTRIBUTE,
+    CUBLASLT_MATRIX_LAYOUT_CREATE,
+    CUBLASLT_MATRIX_LAYOUT_DESTROY,
+    CUBLASLT_MATRIX_LAYOUT_SETATTRIBUTE,
+    CUBLASLT_MATMULPREFERENCE_CREATE,
+    CUBLASLT_MATMULPREFERENCE_DESTROY,
+    CUBLASLT_MATMULPREFERENCE_SETATTRIBUTE,
+    CUBLASLT_MATMULALGO_GETHEURISTIC,
+    CUBLASLT_MATMUL,
+
+    /* cuDNN API */
+    CUDNN_CREATE,
+    CUDNN_DESTROY,
+    CUDNN_CREATE_TENSOR_DESCRIPTOR,
+    CUDNN_DESTROY_TENSOR_DESCRIPTOR,
+    CUDNN_GET_TENSOR_SIZE_IN_BYTES,
+    CUDNN_SET_TENSOR_4D_DESCRIPTOR,
+    CUDNN_SET_TENSOR_ND_DESCRIPTOR,
+    CUDNN_SET_TENSOR_ND_DESCRIPTOR_EX,
+    CUDNN_CREATE_TENSOR_TRANSFORM_DESCRIPTOR,
+    CUDNN_SET_TENSOR_TRANSFORM_DESCRIPTOR,
+    CUDNN_DESTROY_TENSOR_TRANSFORM_DESCRIPTOR,
+    CUDNN_INIT_TRANSFORM_DEST,
+    CUDNN_TRANSFORM_TENSOR_EX,
+    CUDNN_TRANSFORM_FILTER,
+    CUDNN_CREATE_FILTER_DESCRIPTOR,
+    CUDNN_SET_FILTER_ND_DESCRIPTOR,
+    CUDNN_DESTROY_FILTER_DESCRIPTOR,
+    CUDNN_GET_FILTER_SIZE_IN_BYTES,
+    CUDNN_GET_FOLDED_CONV_BACKWARD_DATA_DESCRIPTORS,
+    CUDNN_SET_STREAM,
+    CUDNN_BATCH_NORMALIZATION_BACKWARD_EX,
+    CUDNN_BATCH_NORMALIZATION_FORWARD_TRAINING_EX,
+    CUDNN_BATCH_NORMALIZATION_FORWARD_INFERENCE,
+    CUDNN_BACKEND_CREATE_DESCRIPTOR,
+    CUDNN_BACKEND_DESTROY_DESCRIPTOR,
+    CUDNN_BACKEND_SET_ATTRIBUTE,
+    CUDNN_BACKEND_GET_ATTRIBUTE,
+    CUDNN_BACKEND_EXECUTE,
+    CUDNN_BACKEND_FINALIZE, 
+    CUDNN_GET_BATCH_NORMALIZATION_BACKWARD_EX_WORKSPACE_SIZE,
+    CUDNN_GET_BATCH_NORMALIZATION_FORWARD_TRAINING_EX_WORKSPACE_SIZE,
+    CUDNN_GET_BATCH_NORMALIZATION_TRAINING_EX_RESERVE_SPACE_SIZE,
+    CUDNN_CREATE_CONVOLUTION_DESCRIPTOR,
+    CUDNN_DESTROY_CONVOLUTION_DESCRIPTOR,
+    CUDNN_SET_CONVOLUTION_GROUP_COUNT,
+    CUDNN_SET_CONVOLUTION_MATH_TYPE,
+    CUDNN_SET_CONVOLUTION_ND_DESCRIPTOR,
+    CUDNN_SET_CONVOLUTION_REORDER_TYPE,
+    CUDNN_GET_CONVOLUTION_FORWARD_ALGORITHM_V7,
+    CUDNN_GET_CONVOLUTION_BACKWARD_FILTER_ALGORITHM_V7,
+    CUDNN_GET_CONVOLUTION_BACKWARD_DATA_ALGORITHM_V7,
+    CUDNN_GET_CONVOLUTION_FORWARD_WORKSPACE_SIZE,
+    CUDNN_CONVOLUTION_FORWARD,
+    CUDNN_GET_CONVOLUTION_BACKWARD_DATA_WORKSPACE_SIZE,
+    CUDNN_CONVOLUTION_BACKWARD_FILTER,
+    CUDNN_GET_CONVOLUTION_BACKWARD_FILTER_WORKSPACE_SIZE,
+    CUDNN_CONVOLUTION_BACKWARD_DATA,
+
+    /* NCCL API */
+    NCCL_GROUP_START,
+    NCCL_GROUP_END,
+    NCCL_COMM_INIT_RANK,
+    NCCL_COMM_DESTROY,
+    NCCL_COMM_GET_ASYNC_ERROR, 
+    NCCL_GET_UNIQUE_ID,
+    NCCL_GET_VERSION,
+    NCCL_ALL_REDUCE,
+    NCCL_REDUCE,
+    NCCL_REDUCE_SCATTER,
+    NCCL_ALL_GATHER,
+    NCCL_BROADCAST,
+    NCCL_SEND,
+    NCCL_RECV,
+    NCCL_COMM_COUNT, 
+    NCCL_COMM_USER_RANK,
+    NCCL_COMM_CU_DEVICE,
+    NCCL_COMM_ABORT,
+    NCCL_COMM_INIT_ALL, 
+    NCCL_COMM_INIT_RANK_CONFIG, 
+    NCCL_COMM_SPLIT,
+    NCCL_COMM_FINALIZE,
+    NCCL_COMM_REGISTER,
+    NCCL_COMM_DEREGISTER,
+    NCCL_MEM_ALLOC,
+    NCCL_MEM_FREE,
+    NCCL_RED_OP_CREATE_PRE_MUL_SUM,
+    NCCL_RED_OP_DESTROY
+};
+
+enum MEMCPY_DATA_TYPE {
+    MEMCPY_OTHER = 0,
+    MEMCPY_TRAIN = 1,
+    MEMCPY_VALID = 2,
+    MEMCPY_MODEL = 3
+};
+
+const int NotNeedRecordAPIs[] = {
+    /* CUDA Runtime API */
+    // CUDA_MALLOC, // The validation phase of training sometimes needs to malloc memory
+    // CUDA_MEMCPY_H2D,
+    CUDA_MEMCPY_D2H,
+    CUDA_MEMCPY_D2D,
+    // CUDA_MEMCPY_ASYNC_H2D,
+    CUDA_MEMCPY_ASYNC_D2H,
+    // CUDA_MEMCPY_ASYNC_D2D,
+    CUDA_MEMCPY_TO_SYMBOL,
+    CUDA_MEM_GET_INFO,
+    CUDA_FREE,
+    CUDA_STREAM_IS_CAPTURING,
+    CUDA_STREAM_GET_CAPTURE_INFO,
+    CUDA_EVENT_ELAPSED_TIME, 
+    CUDA_FUNC_GET_ATTRIBUTES,
+    // CUDA_MEMSET,
+    // CUDA_MEMSET_ASYNC,
+    CUDA_SET_DEVICE,
+    CUDA_SET_MAIN_DEVICE,
+    CUDA_GET_DEVICE_COUNT,
+    CUDA_GET_DEVICE,
+    CUDA_GET_DEVICE_PROPERTIES,
+    CUDA_DEVICE_GET_ATTRIBUTE,
+    CUDA_OCCUPANCY_MAX_ACTIVE_BLOCKS_PER_MULTIPROCESSOR,
+    CUDA_OCCUPANCY_MAX_ACTIVE_BLOCKS_PER_MULTIPROCESSOR_WITH_FLAGS,
+
+    /* cuBlas API */
+    CUBLAS_GET_MATH_MODE, 
+
+    /* cuBlasLt API */
+    CUBLASLT_MATMULALGO_GETHEURISTIC,
+
+    /* cuDNN API */
+    CUDNN_GET_TENSOR_SIZE_IN_BYTES,
+    CUDNN_GET_FILTER_SIZE_IN_BYTES,
+    //todo: backend APIs
+    CUDNN_GET_BATCH_NORMALIZATION_BACKWARD_EX_WORKSPACE_SIZE,
+    CUDNN_GET_BATCH_NORMALIZATION_FORWARD_TRAINING_EX_WORKSPACE_SIZE,
+    CUDNN_GET_BATCH_NORMALIZATION_TRAINING_EX_RESERVE_SPACE_SIZE,
+    CUDNN_GET_CONVOLUTION_FORWARD_ALGORITHM_V7,
+    CUDNN_GET_CONVOLUTION_BACKWARD_FILTER_ALGORITHM_V7,
+    CUDNN_GET_CONVOLUTION_BACKWARD_DATA_ALGORITHM_V7,
+    CUDNN_GET_CONVOLUTION_FORWARD_WORKSPACE_SIZE,
+    CUDNN_GET_CONVOLUTION_BACKWARD_DATA_WORKSPACE_SIZE,
+    CUDNN_GET_CONVOLUTION_BACKWARD_FILTER_WORKSPACE_SIZE,
+
+    /* NCCL API */
+    // NCCL_GET_UNIQUE_ID,
+    NCCL_GET_VERSION,
+    NCCL_COMM_COUNT,
+    NCCL_COMM_USER_RANK,
+    NCCL_COMM_CU_DEVICE,
+    NCCL_MEM_ALLOC,
+    NCCL_MEM_FREE
+};
+
+const int ComputeAPIs[] = {
+    /* CUDA Runtime API */
+    NEW_ITERATION_REQ,
+    CUDA_STREAM_WAIT_EVENT, // just a temporary state
+    CUDA_STREAM_SYNCHRONIZE,
+    CUDA_EVENT_RECORD, // just a temporary state
+    CUDA_LAUNCH_KERNEL,
+    CUDA_DEVICE_SYNCHRONIZE,
+
+    /* cuBlas API */
+    CUBLAS_SGEMM_V2,
+    CUBLAS_SGEMM_STRIDED_BATCHED,
+
+    /* cuBlasLt API */
+    CUBLASLT_MATMUL,
+
+    /* cuDNN API */
+    CUDNN_BATCH_NORMALIZATION_BACKWARD_EX,
+    CUDNN_BATCH_NORMALIZATION_FORWARD_TRAINING_EX,
+    CUDNN_BATCH_NORMALIZATION_FORWARD_INFERENCE,
+    //todo: backend APIs
+    CUDNN_CONVOLUTION_FORWARD,
+    CUDNN_CONVOLUTION_BACKWARD_FILTER,
+    CUDNN_CONVOLUTION_BACKWARD_DATA,
+
+    /* NCCL API */
+    NCCL_GROUP_START,
+    NCCL_GROUP_END,
+    NCCL_COMM_GET_ASYNC_ERROR, 
+    NCCL_ALL_REDUCE,
+    NCCL_REDUCE,
+    NCCL_REDUCE_SCATTER,
+    NCCL_ALL_GATHER,
+    NCCL_BROADCAST,
+    NCCL_SEND,
+    NCCL_RECV,
+    // NCCL_COMM_FINALIZE
+};
+
+const int DestroyAPIs[] = {
+    /* CUDA Runtime API */
+    CUDA_STREAM_DESTROY,
+    CUDA_EVENT_DESTROY,
+    // CUDA_FREE,
+
+    /* cuBlas API */
+    CUBLAS_DESTROY_V2,
+
+    /* cuBlasLt API */
+    CUBLASLT_DESTROY,
+    CUBLASLT_MATMULDESC_DESTROY,
+    CUBLASLT_MATRIX_LAYOUT_DESTROY,
+    CUBLASLT_MATMULPREFERENCE_DESTROY,
+
+
+    /* cuDNN API */
+    CUDNN_DESTROY,
+    CUDNN_DESTROY_TENSOR_DESCRIPTOR,
+    CUDNN_DESTROY_TENSOR_TRANSFORM_DESCRIPTOR,
+    CUDNN_DESTROY_FILTER_DESCRIPTOR,
+    // CUDNN_BACKEND_DESTROY_DESCRIPTOR,
+    CUDNN_DESTROY_CONVOLUTION_DESCRIPTOR,
+
+    /* NCCL API */
+    NCCL_COMM_DESTROY,
+    NCCL_COMM_DEREGISTER,
+    // NCCL_MEM_FREE,
+    NCCL_RED_OP_DESTROY
+};
+
+const int CreateAPIs[] = {
+    /* CUDA Runtime API */
+    CUDA_STREAM_CREATE,
+    CUDA_STREAM_CREATE_WITH_FLAGS,
+    CUDA_STREAM_CREATE_WITH_PRIORITY,
+    CUDA_EVENT_CREATE,
+    CUDA_EVENT_CREATE_WITH_FLAGS,   
+
+    /* cuBlas API */
+    CUBLAS_CREATE_V2,
+
+    /* cuBlasLt API */
+    CUBLASLT_CREATE,
+    CUBLASLT_MATMULDESC_CREATE,
+    CUBLASLT_MATRIX_LAYOUT_CREATE,
+    CUBLASLT_MATMULPREFERENCE_CREATE,
+
+    /* cuDNN API */
+    CUDNN_CREATE,
+    CUDNN_CREATE_TENSOR_DESCRIPTOR,
+    CUDNN_CREATE_TENSOR_TRANSFORM_DESCRIPTOR,
+    CUDNN_CREATE_FILTER_DESCRIPTOR,
+    // CUDNN_BACKEND_CREATE_DESCRIPTOR,
+    CUDNN_CREATE_CONVOLUTION_DESCRIPTOR,
+
+    /* NCCL API */
+    NCCL_COMM_INIT_RANK,
+    NCCL_GET_UNIQUE_ID,
+    NCCL_COMM_INIT_ALL,
+    NCCL_COMM_INIT_RANK_CONFIG,
+    NCCL_COMM_SPLIT, 
+    NCCL_COMM_REGISTER,
+    NCCL_RED_OP_CREATE_PRE_MUL_SUM 
+
+};
+
+#endif
\ No newline at end of file
diff --git a/GPU-Virtual-Service/gpu-remoting/include/define.h b/GPU-Virtual-Service/gpu-remoting/include/define.h
new file mode 100644
index 0000000..c65def3
--- /dev/null
+++ b/GPU-Virtual-Service/gpu-remoting/include/define.h
@@ -0,0 +1,552 @@
+#ifndef MY_DEFINE_H
+#define MY_DEFINE_H
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <iomanip>
+#include <bits/stdc++.h>
+#include <stdint.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <arpa/inet.h>
+#include <execinfo.h>
+#include <Python.h>
+#include <frameobject.h>
+
+#define LOG_INFO    1 << 0
+#define LOG_COMM    1 << 1
+#define LOG_ERROR   1 << 2
+#define LOG_DEBUG   1 << 3
+#define LOG_REGS    1 << 4
+
+#define LOG_CURR    (LOG_INFO | LOG_ERROR)
+// #define GV_Monitor
+#define GV_GPUMAP
+// #define GV_Scheduler
+// #define GV_eScheduler
+// #define GV_MSGHANDLER
+
+// #define GV_MEMORY
+// #define GV_MEMORY_PTX
+// #define GV_HANDLE 
+// #define GV_BACKUP
+
+static const uint64_t MB_2_B = 1000 * 1000;
+static const uint64_t MiB_2_B = uint64_t(1) << 20;
+static const uint64_t KB_2_B = 1000;
+static const uint64_t KiB_2_B = uint64_t(1) << 10;
+static const uint64_t SEC_2_US = 1000 * 1000;
+
+#define PAGE_SIZE 4096
+#define ALIGN_UP(size) (((size) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+#define GET_BLOCK_ID(x)             (x >> BLOCK_SHIFT_BIT)               // address to block id
+#define GET_BLOCK_INTER_OFFSET(x)   (x & ((1LL << BLOCK_SHIFT_BIT) - 1)) // address to block internal offset
+
+#define GET_HANDLE_ID(x)            (x & HANDLE_MASK)                    // get the api handle id
+#define CHECK_HANDLE_PREFIX(x)      (x & HANDLE_PREFIX)                  // check the api handle id (prefix)
+
+
+#define CHKERR_ACTION(_cond, _msg, _action) \
+    do { \
+        if (_cond) { \
+            fprintf(stderr, "Failed to %s\n", _msg); \
+            _action; \
+        } \
+    } while (0)
+
+
+#define CHKERR_JUMP(_cond, _msg, _label) \
+    CHKERR_ACTION(_cond, _msg, goto _label)
+
+
+#define CHKERR_JUMP_RETVAL(_cond, _msg, _label, _retval) \
+    do { \
+        if (_cond) { \
+            fprintf(stderr, "Failed to %s, return value %d\n", _msg, _retval); \
+            goto _label; \
+        } \
+    } while (0)
+
+#define GENERATE_KEY(clientID, dataType) (((clientID) << 2) | (dataType))
+#define GET_CLIENT_ID_FROM_KEY(key) ((key) >> 2)
+#define GET_DATA_TYPE_FROM_KEY(key) ((key) & 0x3)
+
+
+namespace tool {
+    /**
+     * @brief Get the Time Diff object
+     * 
+     * @param start_time start time
+     * @param end_time end time
+     * @return double the diff time (sec)
+     */
+    inline double GetTimeDiff(struct timeval start_time, struct timeval end_time) {
+        double second;
+        second = static_cast<double>(end_time.tv_sec - start_time.tv_sec) * SEC_2_US + 
+            end_time.tv_usec - start_time.tv_usec;
+        second = second / SEC_2_US;
+        return second; 
+    }
+    
+    /**
+     * @brief compare the limits with the input
+     * 
+     * @param input the input number
+     * @param lower the lower bound of the limitation
+     * @param upper the upper bound of the limitation
+     * @return uint32_t 
+     */
+    inline uint32_t CompareLimit(uint32_t input, uint32_t lower, uint32_t upper) {
+        if (input <= lower) {
+            return lower; 
+        } else if (input >= upper) {
+            return upper;
+        } else {
+            return input;
+        }    
+    }
+    
+    /**
+     * @brief get the ceil of the division
+     * 
+     * @param a 
+     * @param b 
+     * @return uint32_t 
+     */
+    inline uint32_t DivCeil(uint32_t a, uint32_t b) {
+        uint32_t tmp = a / b;
+        if (a % b == 0) {
+            return tmp;
+        } else {
+            return (tmp + 1);
+        }
+    }
+    
+    /**
+     * @brief print the binary buffer
+     * 
+     * @param fp the pointer to the buffer
+     * @param fp_size the size of the buffer
+     */
+    inline void PrintBinaryArray(const uint8_t* buffer, size_t buffer_size) {
+        for (size_t i = 0; i < buffer_size; i++) {
+            fprintf(stdout, "%02x", buffer[i]);
+        }
+        fprintf(stdout, "\n");
+        return ;
+    }
+
+    inline void HexDump(const uint8_t* data, size_t size){
+        size_t pos = 0;
+        while (pos < size) {
+            printf("%#05zx: ", pos);
+            for (int i = 0; i < 16; i++) {
+                if (pos + i < size) {
+                    printf("%02x", data[pos + i]);
+                } else {
+                    printf("  ");
+                }
+                if (i % 4 == 3) {
+                    printf(" ");
+                }
+            }
+            printf(" | ");
+            for (int i = 0; i < 16; i++) {
+                if (pos + i < size) {
+                    if (data[pos + i] >= 0x20 && data[pos + i] <= 0x7e) {
+                        printf("%c", data[pos + i]);
+                    } else {
+                        printf(".");
+                    }
+                } else {
+                    printf(" ");
+                }
+            }
+            printf("\n");
+            pos += 16;
+        }
+    }
+
+    inline void PrintStackTrace(const std::string& filename, bool append = false) {
+        const int maxFrames = 128; // max number of frames in the stack trace
+        void* buffer[maxFrames];  // buffer pointer to store the stack trace
+
+        int frameCount = backtrace(buffer, maxFrames);
+        char** symbols = backtrace_symbols(buffer, frameCount);
+        if (symbols == nullptr) {
+            fprintf(stderr, "Failed to get the backtrace symbols\n");
+            return ;
+        }
+
+        std::ofstream outFile;
+        if (append) {
+            outFile.open(filename, std::ios::app);
+        }
+        else {
+            outFile.open(filename);
+        }
+        if (!outFile.is_open()) {
+            std::cerr << "Failed to open file " << filename << std::endl;
+            free(symbols);
+            return;
+        }
+
+        outFile << "Call stack:" << std::endl;
+        for (int i = 0; i < frameCount; ++i) {
+            outFile << symbols[i] << std::endl;
+            std::cout << symbols[i] << std::endl;
+        }
+        outFile << std::endl;
+        // std::cout << std::endl;
+
+        outFile.close();
+        free(symbols);
+    }
+
+    inline bool CheckStackTrace(const std::string& target) {
+        bool found = false;
+        const int maxFrames = 128;
+        void* buffer[maxFrames]; 
+
+        int frameCount = backtrace(buffer, maxFrames);
+        char** symbols = backtrace_symbols(buffer, frameCount);
+        if (symbols == nullptr) {
+            return found;
+        }
+
+        const char* target_cstr = target.c_str();
+        for (int i = frameCount; i >= 0; i--) {
+            if (strstr(symbols[i], target_cstr) != NULL) {
+                found = true;
+                break;
+            }
+        }
+        free(symbols);
+        return found;
+    }
+
+    inline void PrintPyStackTrace(const std::string& filename, bool append = false) {
+        std::ofstream outFile;
+        if (append) {
+            outFile.open(filename, std::ios::app);
+        }
+        else {
+            outFile.open(filename);
+        }
+        if (!outFile.is_open()) {
+            std::cerr << "Failed to open file " << filename << std::endl;
+            return;
+        }
+
+        if (!Py_IsInitialized()) {
+            return;
+        }
+
+        PyGILState_STATE gstate = PyGILState_Ensure();
+
+        PyThreadState *tstate = PyThreadState_Get();
+        if (!tstate) {
+            outFile << "Failed to get the thread state" << std::endl;
+            // std::cerr << "Failed to get the thread state" << std::endl;
+            PyGILState_Release(gstate);
+            return;
+        }
+
+        PyFrameObject *frame = tstate->frame;
+        if (!frame) {
+            outFile << "Failed to get the frame" << std::endl;
+            // std::cerr << "Failed to get the frame" << std::endl;
+            PyGILState_Release(gstate);
+            return;
+        }
+
+        outFile << "Python call stack:" << std::endl;
+        while (frame) {
+            PyCodeObject *code = (PyCodeObject *)frame->f_code;
+            const char *filename_str = PyUnicode_AsUTF8(code->co_filename);
+            const char *funcname = PyUnicode_AsUTF8(code->co_name);
+            int lineno = PyFrame_GetLineNumber(frame);
+            outFile << "  File \"" << filename_str << "\", line " << lineno << ", in " << funcname << std::endl;
+            std::cout << "  File \"" << filename_str << "\", line " << lineno << ", in " << funcname << std::endl;
+            frame = frame->f_back;
+        }
+        outFile << std::endl;
+
+        outFile.close();
+        PyGILState_Release(gstate);
+    }
+
+    inline bool CheckPyStackTrace(const std::string& target) {
+        if (!Py_IsInitialized()) {
+            return false;
+        }
+
+        PyGILState_STATE gstate = PyGILState_Ensure();
+        PyThreadState *tstate = PyThreadState_Get();
+        if (!tstate) {
+            PyGILState_Release(gstate);
+            return false;
+        }
+
+        PyFrameObject *frame = tstate->frame;
+        if (!frame) {
+            PyGILState_Release(gstate);
+            return false;
+        }
+
+        const char* target_cstr = target.c_str();
+        while (frame) {
+            PyCodeObject *code = (PyCodeObject *)frame->f_code;
+            const char *funcname = PyUnicode_AsUTF8(code->co_name);
+            if (strstr(funcname, target_cstr) != NULL) {
+                PyGILState_Release(gstate);
+                return true;
+            }
+
+            frame = frame->f_back;
+        }
+
+        PyGILState_Release(gstate);
+        return false;
+    }
+    
+    /**
+     * @brief a simple logger
+     * 
+     * @param logger the logger name
+     * @param fmt the input message
+     */
+    inline void Logging(int loglevel, const char* logger, const char* fmt, ...) {
+        if (LOG_CURR & loglevel) {
+            using namespace std;
+            char buf[BUFSIZ] = {'\0'};
+            va_list ap;
+            va_start(ap, fmt);
+            vsnprintf(buf, BUFSIZ, fmt, ap);
+            va_end(ap);
+            time_t t = std::time(nullptr);
+            stringstream output;
+            output << std::put_time(std::localtime(&t), "%F %T ")
+                << "<" << logger << ">: " << buf;
+            cerr << output.str();
+            return ;
+        }
+    }
+
+    inline void Logging(const char* logger, const char* fmt, ...) {
+        if (LOG_CURR & LOG_DEBUG) {
+            using namespace std;
+            char buf[BUFSIZ] = {'\0'};
+            va_list ap;
+            va_start(ap, fmt);
+            vsnprintf(buf, BUFSIZ, fmt, ap);
+            va_end(ap);
+            time_t t = std::time(nullptr);
+            stringstream output;
+            output << std::put_time(std::localtime(&t), "%F %T ")
+                << "<" << logger << ">: " << buf;
+            cerr << output.str();
+            return ;
+        }
+    }
+
+    inline uint64_t ProcessMemUsage() {
+        using std::ios_base;
+        using std::ifstream;
+        using std::string;
+
+        uint64_t vm_usage     = 0;
+        uint64_t resident_set = 0;
+
+        // 'file' stat seems to give the most reliable results
+        //
+        ifstream stat_stream("/proc/self/stat",ios_base::in);
+
+        // dummy vars for leading entries in stat that we don't care about
+        //
+        string pid, comm, state, ppid, pgrp, session, tty_nr;
+        string tpgid, flags, minflt, cminflt, majflt, cmajflt;
+        string utime, stime, cutime, cstime, priority, nice;
+        string O, itrealvalue, starttime;
+
+        // the two fields we want
+        //
+        unsigned long vsize;
+        long rss;
+
+        stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr
+                    >> tpgid >> flags >> minflt >> cminflt >> majflt >> cmajflt
+                    >> utime >> stime >> cutime >> cstime >> priority >> nice
+                    >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest
+
+        stat_stream.close();
+
+        long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
+        vm_usage     = vsize / 1024 ;
+        resident_set = rss * page_size_kb;
+        return resident_set; // only for PM
+    }
+
+    inline uint64_t GetMaxMemoryUsage() {
+        struct rusage currentUsage;
+        getrusage(RUSAGE_SELF, &currentUsage);
+        return currentUsage.ru_maxrss;
+    }
+
+    inline std::string GenerateUUID() {
+        static std::random_device rd;
+        static std::mt19937_64 gen(rd());
+        static std::uniform_int_distribution<uint64_t> dis;
+
+        std::array<uint8_t, 16> data;
+        std::array<char, 37> uuid;
+
+        // 生成16个随机字节
+        uint64_t* ptr = reinterpret_cast<uint64_t*>(data.data());
+        ptr[0] = dis(gen);
+        ptr[1] = dis(gen);
+
+        // 设置版本 (4) 和变体位
+        data[6] = (data[6] & 0x0F) | 0x40;
+        data[8] = (data[8] & 0x3F) | 0x80;
+
+        // 转换为十六进制字符串
+        static const char* hex_chars = "0123456789abcdef";
+        char* dst = uuid.data();
+        for (int i = 0; i < 16; ++i) {
+            if (i == 4 || i == 6 || i == 8 || i == 10) {
+                *dst++ = '-';
+            }
+            *dst++ = hex_chars[data[i] >> 4];
+            *dst++ = hex_chars[data[i] & 0x0F];
+        }
+        *dst = '\0';
+
+        return std::string(uuid.data(), 36);
+    }
+
+    inline bool FileExist(std::string filePath) {
+        return std::filesystem::is_regular_file(filePath);
+    }
+
+    inline uint64_t GetStrongSeed() {
+        uint64_t a = clock();
+        struct timeval currentTime;
+        gettimeofday(&currentTime, NULL);
+        uint64_t b = currentTime.tv_sec * SEC_2_US + currentTime.tv_usec;
+        uint64_t c = getpid();
+
+        // Robert Jenkins' 96 bit Mix Function
+        a = a - b;  a = a - c;  a = a ^ (c >> 13);
+        b = b - c;  b = b - a;  b = b ^ (a << 8);
+        c = c - a;  c = c - b;  c = c ^ (b >> 13);
+        a = a - b;  a = a - c;  a = a ^ (c >> 12);
+        b = b - c;  b = b - a;  b = b ^ (a << 16);
+        c = c - a;  c = c - b;  c = c ^ (b >> 5);
+        a = a - b;  a = a - c;  a = a ^ (c >> 3);
+        b = b - c;  b = b - a;  b = b ^ (a << 10);
+        c = c - a;  c = c - b;  c = c ^ (b >> 15);
+
+        return c;
+    }
+
+    inline void GetIpStrFromSockaddr(const struct sockaddr_storage *sock_addr, 
+                                    char *ip_str, size_t max_size) {
+        if (!ip_str) {
+            return;  
+        }
+
+        if (sock_addr->ss_family == AF_INET) {
+            const struct sockaddr_in *addr_in = reinterpret_cast<const struct sockaddr_in *>(sock_addr);
+            inet_ntop(AF_INET, &(addr_in->sin_addr), ip_str, max_size);
+        }
+        else if (sock_addr->ss_family == AF_INET6) {
+            const struct sockaddr_in6 *addr_in6 = reinterpret_cast<const struct sockaddr_in6 *>(sock_addr);
+            inet_ntop(AF_INET6, &(addr_in6->sin6_addr), ip_str, max_size);
+        } else {
+            ip_str[0] = '\0'; 
+        }
+    }
+
+    inline void GetPortStrFromSockaddr(const struct sockaddr_storage *sock_addr, 
+                                    char *port_str, size_t max_size) {
+        if (!port_str) {
+            return;  // 如果port_str是NULL，直接返回
+        }
+
+        if (sock_addr->ss_family == AF_INET) {
+            const struct sockaddr_in *addr_in = reinterpret_cast<const struct sockaddr_in *>(sock_addr);
+            snprintf(port_str, max_size, "%d", ntohs(addr_in->sin_port));
+        }
+        else if (sock_addr->ss_family == AF_INET6) {
+            const struct sockaddr_in6 *addr_in6 = reinterpret_cast<const struct sockaddr_in6 *>(sock_addr);
+            snprintf(port_str, max_size, "%d", ntohs(addr_in6->sin6_port));
+        } else {
+            port_str[0] = '\0'; // 对于不支持的地址家族，设置为空字符串
+        }
+    }
+
+    inline void SetSockAddr(const char *address_str, uint16_t server_port, 
+                    struct sockaddr_storage *saddr, sa_family_t ai_family){
+        struct sockaddr_in *sa_in;
+        struct sockaddr_in6 *sa_in6;
+
+        /* The server will listen on INADDR_ANY */
+        memset(saddr, 0, sizeof(*saddr));
+
+        switch (ai_family) {
+        case AF_INET:
+            sa_in = (struct sockaddr_in*)saddr;
+            if (address_str != NULL) {
+                inet_pton(AF_INET, address_str, &sa_in->sin_addr);
+            } else {
+                sa_in->sin_addr.s_addr = INADDR_ANY;
+            }
+            sa_in->sin_family = AF_INET;
+            sa_in->sin_port   = htons(server_port);
+            break;
+        case AF_INET6:
+            sa_in6 = (struct sockaddr_in6*)saddr;
+            if (address_str != NULL) {
+                inet_pton(AF_INET6, address_str, &sa_in6->sin6_addr);
+            } else {
+                sa_in6->sin6_addr = in6addr_any;
+            }
+            sa_in6->sin6_family = AF_INET6;
+            sa_in6->sin6_port   = htons(server_port);
+            break;
+        default:
+            fprintf(stderr, "Invalid address family");
+            break;
+        }
+    }
+
+    inline bool ReadSocketMessage(int sock, uint8_t* buffer, size_t buffer_size) {
+        if (sock < 0 || !buffer) {
+            tool::Logging(LOG_ERROR, "ReadSocketMessage", "Invalid socket or buffer\n");
+            return false;
+        }
+        size_t bytesRead = 0;
+        while (bytesRead < buffer_size) {
+            int ret = read(sock, buffer + bytesRead, buffer_size - bytesRead);
+            if (ret < 0) {
+                tool::Logging(LOG_ERROR, "ReadSocketMessage", "Failed to read from socket\n");
+                return false;
+            } else if (ret == 0) {
+                tool::Logging(LOG_ERROR, "ReadSocketMessage", "Socket closed\n");
+                return false;
+            }
+            bytesRead += ret;
+        }
+        return true;
+    }
+
+} // namespace tool
+#endif 
\ No newline at end of file
diff --git a/GPU-Virtual-Service/gpu-remoting/src/common/configure.cc b/GPU-Virtual-Service/gpu-remoting/src/common/configure.cc
new file mode 100644
index 0000000..8318a6e
--- /dev/null
+++ b/GPU-Virtual-Service/gpu-remoting/src/common/configure.cc
@@ -0,0 +1,43 @@
+#include "../../include/configure.h"
+
+Configure::~Configure() {
+}
+
+Configure::Configure(std::string path, bool isClient) {
+    isClient_ = isClient;
+    this->ReadConf(path);
+}
+
+void Configure::ReadConf(std::string path) {
+    using namespace boost;
+    using namespace boost::property_tree;
+    
+    ptree root;
+    read_json<ptree>(path, root);
+
+    serverIp_ = root.get<string>("ServerConfig.serverIp_");
+    serverPort_ = root.get<uint16_t>("ServerConfig.serverPort_");
+
+    dpcIp_ = root.get<string>("DispatcherConfig.dpcIp_");
+    dpcPort_ = root.get<uint16_t>("DispatcherConfig.dpcPort_");
+
+    monIp_ = root.get<string>("MonitorConfig.monitorIp_");
+    monPort_ = root.get<uint16_t>("MonitorConfig.monitorPort_");
+
+    if (isClient_) {
+        const char* envClientID = std::getenv("FLEXGV_CLIENT_ID");
+        const char* envPriority = std::getenv("FLEXGV_PRIORITY");
+        const char* envReqGPUnum = std::getenv("FLEXGV_REQ_NUM");
+        const char* envModel = std::getenv("FLEXGV_MODEL");
+        const char* envBatchSize = std::getenv("FLEXGV_BATCH_SIZE");
+        clientID_ = envClientID ? std::stoull(envClientID) : root.get<uint64_t>("ClientConfig.clientID_");
+        reqGPUnum_ = root.get<size_t>("ClientConfig.requestGPUnum_");
+        priority_ = envPriority ? std::stoull(envPriority) : root.get<size_t>("ClientConfig.priority_");
+        proxyIp_ = root.get<string>("ClientConfig.proxyIp_");
+        proxyPort_ = root.get<uint16_t>("ClientConfig.proxyPort_");
+        DDPreqGPUnum_ = envReqGPUnum ? std::stoull(envReqGPUnum) : 1;
+        model_ = envModel ? envModel : "resnet18";
+        batchSize_ = envBatchSize ? std::stoull(envBatchSize) : 32;
+    }  
+    return ;
+}
diff --git a/GPU-Virtual-Service/gpu-remoting/src/common/elfHandle.cc b/GPU-Virtual-Service/gpu-remoting/src/common/elfHandle.cc
new file mode 100644
index 0000000..ca06d2b
--- /dev/null
+++ b/GPU-Virtual-Service/gpu-remoting/src/common/elfHandle.cc
@@ -0,0 +1,732 @@
+#include "../../include/hook/elfHandle.h"
+
+static const char* myName = "elfHandle";
+
+int InitElf2(void){
+    if (elf_version(EV_CURRENT) == EV_NONE) {
+        tool::Logging(LOG_ERROR, myName, "ELF library initialization failed: %s\n", elf_errmsg(-1));
+        return -1;
+    }
+    return 0;
+}
+
+static int GetStrByElfFlag(char** str, uint64_t flag)
+{
+    return asprintf(str, "64Bit: %s, Debug: %s, Linux: %s, Compress %s",
+        (flag & FATBIN_FLAG_64BIT) ? "yes" : "no",
+        (flag & FATBIN_FLAG_DEBUG) ? "yes" : "no",
+        (flag & FATBIN_FLAG_LINUX) ? "yes" : "no",
+        (flag & FATBIN_FLAG_COMPRESS) ? "yes" : "no");
+}
+
+static void PrintFatTextHeader(FatTextHeader_t *th)
+{
+    char* flagstr = NULL;
+    GetStrByElfFlag(&flagstr, th->flags);
+
+    tool::Logging(LOG_REGS, myName, "text_header: fatbin_kind: %#x, header_size %#x, size %#zx, compressed_size %#x,\
+ minor %#x, major %#x, arch %d, decompressed_size %#zx\n\tflags: %s\n",
+        th->kind,
+        th->header_size,
+        th->size,
+        th->compressed_size,
+        th->minor,
+        th->major,
+        th->arch,
+        th->decompressed_size,
+        flagstr);
+    tool::Logging(LOG_REGS, myName, "\tunknown fields: unknown1: %#x, unknown2: %#x, zeros: %#zx\n",
+        th->unknown1,
+        th->unknown2,
+        th->zero);
+
+    free(flagstr);
+}
+
+/** Check the header of a fatbin
+ * Performs some integrity checks and returns the elf header
+ * @param fatbin_data Pointer to the fatbin data
+ * @param fatbin_size Size of the fatbin data
+ * @param decompressed_size Pointer to a variable that will be set to the size of the decompressed data
+ * @param compressed_data Pointer to a variable that will be set to point to the compressed data
+*/
+static int GetFatElfHeader(const uint8_t* fatbin_data, size_t fatbin_size, FatElfHeader_t **elf_header)
+{
+    FatElfHeader_t *eh = NULL;
+
+    if (fatbin_data == NULL || elf_header == NULL) {
+        tool::Logging(LOG_ERROR, myName, "fatbin_data is NULL\n");
+        return 1;
+    }
+
+    // if (fatbin_size < sizeof(struct fat_elf_header)) {
+    //     tool::Logging(LOG_ERROR, myName, "fatbin_size is too small");
+    //     return 1;
+    // }
+
+    eh = (FatElfHeader_t*) fatbin_data;
+    if (eh->magic != FATBIN_TEXT_MAGIC) {
+        tool::Logging(LOG_ERROR, myName, "Invalid magic  number: expected %#x but got %#x\n", FATBIN_TEXT_MAGIC, eh->magic);
+        return 1;
+    }
+
+    if (eh->version != 1 || eh->header_size != sizeof(FatElfHeader_t)) {
+        tool::Logging(LOG_ERROR, myName, "fatbin text version is wrong or header size is inconsistent.\
+            This is a sanity check to avoid reading a new fatbinary format\n");
+        return 1;
+    }
+    
+    *elf_header = eh;
+    return 0;
+}
+
+/** Check the text header of a fatbin
+ * Performs some integrity checks and returns the text header
+ * @param fatbin_data Pointer to the fatbin data
+ * @param fatbin_size Size of the fatbin data
+ * @param decompressed_size Pointer to a variable that will be set to the size of the decompressed data
+ * @param compressed_data Pointer to a variable that will be set to point to the compressed data
+*/
+static int GetFatTextHeader(const uint8_t* fatbin_data, size_t fatbin_size, FatTextHeader_t **text_header)
+{
+    FatTextHeader_t *th = NULL;
+
+    if (fatbin_data == NULL || text_header == NULL) {
+        tool::Logging(LOG_ERROR, myName, "fatbin_data is NULL\n");
+        return 1;
+    }
+
+    // if (fatbin_size < sizeof(struct fat_text_header)) {
+    //     tool::Logging(LOG_ERROR, myName, "fatbin_size is too small");
+    //     return 1;
+    // }
+
+    th = (FatTextHeader_t*)fatbin_data;
+
+    if(th->obj_name_offset != 0) {
+        if (((char*)th)[th->obj_name_offset + th->obj_name_len] != '\0') {
+            tool::Logging(LOG_REGS, myName, "Fatbin object name is not null terminated\n");
+        } else {
+            char *obj_name = (char*)th + th->obj_name_offset;
+            tool::Logging(LOG_REGS, myName, "Fatbin object name: %s (len:%#x)\n", obj_name, th->obj_name_len);
+        }
+    }
+
+    *text_header = th;
+    return 0;
+}
+
+/** Decompresses a fatbin file
+ * @param input Pointer compressed input data
+ * @param input_size Size of compressed data
+ * @param output preallocated memory where decompressed output should be stored
+ * @param output_size size of output buffer. Should be equal to the size of the decompressed data
+ */
+static size_t DecompressFatbin(const uint8_t* input, size_t input_size, uint8_t* output, size_t output_size)
+{
+    size_t ipos = 0, opos = 0;  
+    uint64_t next_nclen;  // length of next non-compressed segment
+    uint64_t next_clen;   // length of next compressed segment
+    uint64_t back_offset; // negative offset where redudant data is located, relative to current opos
+
+    while (ipos < input_size) {
+        next_nclen = (input[ipos] & 0xf0) >> 4;
+        next_clen = 4 + (input[ipos] & 0xf);
+        if (next_nclen == 0xf) {
+            do {
+                next_nclen += input[++ipos];
+            } while (input[ipos] == 0xff);
+        }
+        
+        if (memcpy(output + opos, input + (++ipos), next_nclen) == NULL) {
+            tool::Logging(LOG_ERROR, myName, "copying data\n");
+            return 0;
+        }
+#ifdef FATBIN_DECOMPRESS_DEBUG
+        printf("%#04zx nocompress (len:%#x):\n", opos, next_nclen);
+        tool::HexDump(output + opos, next_nclen);
+#endif
+        ipos += next_nclen;
+        opos += next_nclen;
+        if (ipos >= input_size || opos >= output_size) {
+            break;
+        }
+        back_offset = input[ipos] + (input[ipos + 1] << 8);       
+        ipos += 2;
+        if (next_clen == 0xf+4) {
+            do {
+                next_clen += input[ipos++];
+            } while (input[ipos - 1] == 0xff);
+        }
+#ifdef FATBIN_DECOMPRESS_DEBUG
+        printf("%#04zx compress (decompressed len: %#x, back_offset %#x):\n", opos, next_clen, back_offset);
+#endif
+        if (next_clen <= back_offset) {
+            if (memcpy(output + opos, output + opos - back_offset, next_clen) == NULL) {
+                tool::Logging(LOG_ERROR, myName, "Error copying data\n");
+                return 0;
+            }
+        } else {
+            if (memcpy(output + opos, output + opos - back_offset, back_offset) == NULL) {
+                tool::Logging(LOG_ERROR, myName, "Error copying data\n");
+                return 0;
+            }
+            for (size_t i = back_offset; i < next_clen; i++) {
+                output[opos + i] = output[opos + i - back_offset];
+            }
+        }
+#ifdef FATBIN_DECOMPRESS_DEBUG
+        tool::HexDump(output + opos, next_clen);
+#endif
+        opos += next_clen;
+    }
+    tool::Logging(LOG_REGS, myName, "ipos: %#zx, opos: %#zx, ilen: %#zx, olen: %#zx\n", ipos, opos, input_size, output_size);
+    return opos;
+}
+
+
+static ssize_t DecompressSingleSection(const uint8_t *input, uint8_t **output, size_t *output_size,
+                                         FatElfHeader_t *eh, FatTextHeader_t *th)
+{
+    size_t padding;
+    size_t input_read = 0;
+    size_t output_written = 0;
+    size_t decompress_ret = 0;
+    const uint8_t zeroes[8] = {0};
+
+    if (input == NULL || output == NULL || eh == NULL || th == NULL) {
+        tool::Logging(LOG_ERROR, myName, "invalid parameters\n");
+        return 1;
+    }
+
+    // add max padding of 7 bytes
+    if ((*output = (uint8_t*)malloc(th->decompressed_size + 7)) == NULL) {
+        tool::Logging(LOG_ERROR, myName, "Error allocating memory of size %#zx for output buffer: %s\n", 
+                th->decompressed_size, strerror(errno));
+        goto error;
+    }
+    PrintFatTextHeader(th);
+
+    if ((decompress_ret = DecompressFatbin(input, th->compressed_size, *output, th->decompressed_size)) != th->decompressed_size) {
+        tool::Logging(LOG_ERROR, myName, "Decompression failed: decompressed size is %#zx, but header says %#zx\n", 
+                decompress_ret, th->decompressed_size);
+        tool::Logging(LOG_ERROR, myName, "input pos: %#zx, output pos: %#zx\n", input - (uint8_t*)eh, *output);
+        tool::HexDump(input, 0x160);
+        if (decompress_ret >= 0x60)
+            tool::HexDump((*output) + decompress_ret - 0x60, 0x60);
+        goto error;
+    }
+    input_read += th->compressed_size;
+    output_written += th->decompressed_size;
+
+    padding = ((8 - (size_t)(input + input_read)) % 8);
+    if (memcmp(input + input_read, zeroes, padding) != 0) {
+        tool::Logging(LOG_ERROR, myName, "expected %#zx zero bytes, got:\n", padding);
+        tool::HexDump(input + input_read, 0x60);
+        goto error;
+    }
+    input_read += padding;
+
+    padding = ((8 - (size_t)th->decompressed_size) % 8);
+    // Because we always allocated enough memory for one more elf_header and this is smaller than
+    // the maximal padding of 7, we do not have to reallocate here.
+    memset(*output, 0, padding);
+    output_written += padding;
+
+    *output_size = output_written;
+    return input_read;
+ error:
+    free(*output);
+    *output = NULL;
+    return -1;
+}
+
+int GetFatbinInfo(FatHeader_t *fatbin, std::vector<KernelInfo_t*> *kernel_list, uint8_t** fatbin_mem, size_t* fatbin_size) {
+    FatElfHeader_t* eh;
+    FatTextHeader_t* th;
+    const uint8_t *input_pos = NULL;
+    const uint8_t *fatbin_data = NULL;
+    uint8_t *text_data = NULL;
+    size_t text_data_size = 0;
+    size_t fatbin_total_size = 0;
+    int ret = -1;
+    if (fatbin == NULL || fatbin_mem == NULL || fatbin_size == NULL) {
+        tool::Logging(LOG_ERROR, myName, "at least one parameter is NULL\n");
+        goto error;
+    }
+    fatbin_data = input_pos = (const uint8_t*)fatbin->text;
+    if (fatbin->magic != FATBIN_STRUCT_MAGIC) {
+        tool::Logging(LOG_ERROR, myName, "fatbin struct magic number is wrong. Got %llx, expected %llx.\n", fatbin->magic, FATBIN_STRUCT_MAGIC);
+        goto error;
+    }
+    tool::Logging(LOG_REGS, myName, "Fatbin: magic: %x, version: %x, text: %lx, data: %lx, ptr: %lx, ptr2: %lx, zero: %lx\n",
+           fatbin->magic, fatbin->version, fatbin->text, fatbin->data, fatbin->unknown, fatbin->text2, fatbin->zero);
+
+    if (GetFatElfHeader((uint8_t*)fatbin->text, sizeof(FatElfHeader_t), &eh) != 0) {
+        tool::Logging(LOG_ERROR, myName, "Something went wrong while checking the elf header.\n");
+        goto error;
+    }
+    // tool::Logging(LOG_REGS, myName, "elf header: magic: %#x, version: %#x, header_size: %#x, size: %#zx",
+    //        eh->magic, eh->version, eh->header_size, eh->size); 
+
+    input_pos += eh->header_size;
+    fatbin_total_size = eh->header_size + eh->size;
+    do {
+        if (GetFatTextHeader(input_pos, *fatbin_size - (input_pos - fatbin_data) - eh->header_size, &th) != 0) {
+            tool::Logging(LOG_ERROR, myName, "Something went wrong while checking the text header.\n");
+            goto error;
+        }
+        //print_header(th);
+        input_pos += th->header_size;
+        if (th->kind != 2) { // section does not cotain device code (but e.g. PTX)
+            input_pos += th->size;
+            continue;
+        }
+        if (th->flags & FATBIN_FLAG_DEBUG) {
+            tool::Logging(LOG_REGS, myName, "fatbin contains debug information.\n");
+        }
+
+        if (th->flags & FATBIN_FLAG_COMPRESS) {
+            ssize_t input_read;
+
+            tool::Logging(LOG_REGS, myName, "fatbin contains compressed device code. Decompressing...\n");
+            if ((input_read = DecompressSingleSection(input_pos, &text_data, &text_data_size, eh, th)) < 0) {
+                tool::Logging(LOG_ERROR, myName, "Something went wrong while decompressing text section.\n");
+                goto error;
+            }
+            input_pos += input_read;
+            //hexdump(text_data, text_data_size);
+        } else {
+            text_data = (uint8_t*)input_pos;
+            text_data_size = th->size;
+            input_pos += th->size;
+        }
+        // print_header(th);
+        if (GetParameterInfo(kernel_list, text_data , text_data_size) != 0) {
+            tool::Logging(LOG_ERROR, myName, "error getting parameter info\n");
+            goto error;
+        }
+        if (th->flags & FATBIN_FLAG_COMPRESS) {
+            free(text_data);
+        }
+    } while (input_pos < (uint8_t*)eh + eh->header_size + eh->size);
+
+    // if (get_elf_header((uint8_t*)fatbin->text2, sizeof(struct fat_elf_header), &eh) != 0) {
+    //     tool::Logging(LOG_ERROR, myName, "Something went wrong while checking the header.");
+    //     goto error;
+    // }
+    // fatbin_total_size += eh->header_size + eh->size;
+
+    *fatbin_mem = (uint8_t*)fatbin->text;
+    *fatbin_size = fatbin_total_size;
+    ret = 0;
+ error:
+    return ret;
+}
+
+static int GetSectionByName(Elf *elf, const char *name, Elf_Scn **section)
+{
+    Elf_Scn *scn = NULL;
+    GElf_Shdr shdr;
+    char *section_name = NULL;
+    size_t str_section_index;
+
+    if (elf == NULL || name == NULL || section == NULL) {
+        tool::Logging(LOG_ERROR, myName, "invalid argument\n");
+        return -1;
+    }
+
+    if (elf_getshdrstrndx(elf, &str_section_index) != 0) {
+        tool::Logging(LOG_ERROR, myName, "elf_getshstrndx failed\n");
+        return -1;
+    }
+
+    while ((scn = elf_nextscn(elf, scn)) != NULL) {
+        if (gelf_getshdr(scn, &shdr) != &shdr) {
+            tool::Logging(LOG_ERROR, myName, "gelf_getshdr failed\n");
+            return -1;
+        }
+        if ((section_name = elf_strptr(elf, str_section_index, shdr.sh_name)) == NULL) {
+            tool::Logging(LOG_ERROR, myName, "elf_strptr failed\n");
+            return -1;
+        }
+        if (strcmp(section_name, name) == 0) {
+            *section = scn;
+            return 0;
+        }
+    }
+    return -1;
+}
+
+static char* GetKernelSectionFromKernelName(const char *kernel_name)
+{
+    char *section_name = NULL;
+    if (kernel_name == NULL) {
+        tool::Logging(LOG_ERROR, myName, "invalid argument\n");
+        return NULL;
+    }
+
+    if (kernel_name[0] == '$') {
+        const char *p;
+        if ((p = strchr(kernel_name+1, '$')) == NULL) {
+            tool::Logging(LOG_ERROR, myName, "invalid kernel name\n");
+            return NULL;
+        }
+        int len = (p - kernel_name) - 1;
+        if (asprintf(&section_name, ".nv.info.%.*s", len, kernel_name+1) == -1) {
+            tool::Logging(LOG_ERROR, myName, "asprintf failed\n");
+            return NULL;
+        }
+    } else {
+        if (asprintf(&section_name, ".nv.info.%s", kernel_name) == -1) {
+            tool::Logging(LOG_ERROR, myName, "asprintf failed\n");
+            return NULL;
+        }
+    }
+    return section_name;
+}
+
+static int GetParaForKernel(Elf *elf, KernelInfo_t *kernel, void* memory, size_t memsize)
+{
+    struct __attribute__((__packed__)) nv_info_kernel_entry {
+        uint8_t format;
+        uint8_t attribute;
+        uint16_t values_size;
+        uint32_t values;
+    };
+    struct __attribute__((__packed__)) nv_info_kparam_info {
+        uint32_t index;
+        uint16_t ordinal;
+        uint16_t offset;
+        uint16_t unknown : 12;
+        uint8_t  cbank : 6;
+        uint16_t size : 14;
+        // missing are "space" (possible padding info?), and "Pointee's logAlignment"
+        // these were always 0 in the kernels I tested
+    };
+    int ret = -1;
+    char *section_name = NULL;
+    Elf_Scn *section = NULL;
+    Elf_Data *data = NULL;
+    size_t secpos=0;
+    int i=0;
+
+    if (kernel == NULL || kernel->name == NULL || memory == NULL) {
+        tool::Logging(LOG_ERROR, myName, "at least one parameter is NULL\n");
+        goto cleanup;
+    }
+    kernel->paramNum = 0;
+    kernel->paramSize = 0;
+    kernel->paramOffsets = NULL;
+    kernel->paramSizes = NULL;
+
+    if ((section_name = GetKernelSectionFromKernelName(kernel->name)) == NULL) {
+        tool::Logging(LOG_ERROR, myName, "GetKernelSectionFromKernelName failed\n");
+        goto cleanup;
+    }
+
+    if (GetSectionByName(elf, section_name, &section) != 0) {
+        tool::Logging(LOG_ERROR, myName, "section %s not found\n", section_name);
+        goto cleanup;
+    }
+
+    if ((data = elf_getdata(section, NULL)) == NULL) {
+        tool::Logging(LOG_ERROR, myName, "error getting section data\n");
+        goto cleanup;
+    }
+
+    while (secpos < data->d_size) {
+        struct nv_info_kernel_entry *entry = (struct nv_info_kernel_entry*)((uint8_t*)data->d_buf+secpos);
+        // printf("entry %d: format: %#x, attr: %#x, ", i++, entry->format, entry->attribute);
+        if (entry->format == EIFMT_SVAL && entry->attribute == EIATTR_KPARAM_INFO) {
+            if (entry->values_size != 0xc) {
+                tool::Logging(LOG_ERROR, myName, "EIATTR_KPARAM_INFO values size has not the expected value of 0xc\n");
+                goto cleanup;
+            }
+            struct nv_info_kparam_info *kparam = (struct nv_info_kparam_info*)&entry->values;
+            // printf("kparam: index: %#x, ordinal: %#x, offset: %#x, unknown: %#0x, cbank: %#0x, size: %#0x\n",
+            //     kparam->index, kparam->ordinal, kparam->offset, kparam->unknown, kparam->cbank, kparam->size);
+            tool::Logging(LOG_REGS, myName, "param %d: offset: %#x, size: %#x\n", kparam->ordinal, kparam->offset, kparam->size);
+            if (kparam->ordinal >= kernel->paramNum) {
+                kernel->paramOffsets = (uint16_t*)realloc(kernel->paramOffsets,
+                                              (kparam->ordinal+1)*sizeof(uint16_t));
+                kernel->paramSizes = (uint16_t*)realloc(kernel->paramSizes,
+                                            (kparam->ordinal+1)*sizeof(uint16_t));
+                kernel->paramNum = kparam->ordinal+1;
+            }
+            kernel->paramOffsets[kparam->ordinal] = kparam->offset;
+            kernel->paramSizes[kparam->ordinal] = kparam->size;
+            secpos += sizeof(struct nv_info_kernel_entry) + entry->values_size-4;
+        } else if (entry->format == EIFMT_HVAL && entry->attribute == EIATTR_CBANK_PARAM_SIZE) {
+            kernel->paramSize = entry->values_size;
+            tool::Logging(LOG_REGS, myName, "cbank_param_size: %#0x\n", entry->values_size);
+            secpos += sizeof(struct nv_info_kernel_entry)-4;
+        } else if (entry->format == EIFMT_HVAL) {
+            // printf("hval: %#x(%d)\n", entry->values_size, entry->values_size);
+            secpos += sizeof(struct nv_info_kernel_entry)-4;
+        } else if (entry->format == EIFMT_SVAL) {
+            // printf("sval_size: %#x ", entry->values_size);
+            // for (int j=0; j*sizeof(uint32_t) < entry->values_size; j++) {
+            //     printf("val%d: %#x(%d) ", j, (&entry->values)[j], (&entry->values)[j]);
+            // }
+            // printf("\n");
+            secpos += sizeof(struct nv_info_kernel_entry) + entry->values_size-4;
+        } else if (entry->format == EIFMT_NVAL) {
+            // printf("nval\n");
+            secpos += sizeof(struct nv_info_kernel_entry)-4;
+        } else {
+            tool::Logging(LOG_REGS, myName, "unknown format: %#x\n", entry->format);
+            secpos += sizeof(struct nv_info_kernel_entry)-4;
+        }
+    }
+    // printf("remaining: %d\n", data->d_size % sizeof(struct nv_info_kernel_entry));
+    ret = 0;
+ cleanup:
+    free(section_name);
+    return ret;
+}
+
+static int GetSymbolTable(Elf *elf, Elf_Data **symbol_table_data, size_t *symbol_table_size, GElf_Shdr *symbol_table_shdr)
+{
+    GElf_Shdr shdr;
+    Elf_Scn *section = NULL;
+
+    if (elf == NULL || symbol_table_data == NULL || symbol_table_size == NULL) {
+        tool::Logging(LOG_ERROR, myName, "invalid argument\n");
+        return -1;
+    }
+
+    if (GetSectionByName(elf, ".symtab", &section) != 0) {
+        tool::Logging(LOG_ERROR, myName, "could not find .symtab section\n");
+        return -1;
+    }
+
+    if (gelf_getshdr(section, &shdr) == NULL) {
+        tool::Logging(LOG_ERROR, myName, "gelf_getshdr failed\n");
+        return -1;
+    }
+
+    if (symbol_table_shdr != NULL) {
+        *symbol_table_shdr = shdr;
+    }
+
+    if(shdr.sh_type != SHT_SYMTAB) {
+        tool::Logging(LOG_ERROR, myName, "not a symbol table: %d\n", shdr.sh_type);
+        return -1;
+    }
+
+    if ((*symbol_table_data = elf_getdata(section, NULL)) == NULL) {
+        tool::Logging(LOG_ERROR, myName, "elf_getdata failed\n");
+        return -1;
+    }
+
+    *symbol_table_size = shdr.sh_size / shdr.sh_entsize;
+
+    return 0;
+}
+
+static int CheckElf(Elf *elf)
+{
+    Elf_Kind ek;
+    GElf_Ehdr ehdr;
+
+    int elfclass;
+    char *id;
+    size_t program_header_num;
+    size_t sections_num;
+    size_t section_str_num;
+    int ret = -1;
+
+    if ((ek = elf_kind(elf)) != ELF_K_ELF) {
+        tool::Logging(LOG_ERROR, myName, "elf_kind is not ELF_K_ELF, but %d\n", ek);
+        goto cleanup;
+    }
+
+    if (gelf_getehdr(elf, &ehdr) == NULL) {
+        tool::Logging(LOG_ERROR, myName, "gelf_getehdr failed\n");
+        goto cleanup;
+    }
+
+    if ((elfclass = gelf_getclass(elf)) == ELFCLASSNONE) {
+        tool::Logging(LOG_ERROR, myName, "gelf_getclass failed\n");
+        goto cleanup;
+    }
+
+    if ((id = elf_getident(elf, NULL)) == NULL) {
+        tool::Logging(LOG_ERROR, myName, "elf_getident failed\n");
+        goto cleanup;
+    }
+
+    tool::Logging(LOG_REGS, myName, "elfclass: %d-bit; elf ident[0..%d]: %7s\n",
+        (elfclass == ELFCLASS32) ? 32 : 64,
+        EI_ABIVERSION, id);
+
+    if (elf_getshdrnum(elf, &sections_num) != 0) {
+        tool::Logging(LOG_ERROR, myName, "elf_getphdrnum failed\n");
+        goto cleanup;
+    }
+
+    if (elf_getphdrnum(elf, &program_header_num) != 0) {
+        tool::Logging(LOG_ERROR, myName, "elf_getshdrnum failed\n");
+        goto cleanup;
+    }
+
+    if (elf_getshdrstrndx(elf, &section_str_num) != 0) {
+        tool::Logging(LOG_ERROR, myName, "elf_getshstrndx Wfailed\n");
+        goto cleanup;
+    }
+
+    tool::Logging(LOG_REGS, myName, "elf contains %d sections, %d program_headers, string table section: %d\n",
+        sections_num, program_header_num, section_str_num);
+
+    ret = 0;
+cleanup:
+    return ret;
+}
+
+int GetParameterInfo(std::vector<KernelInfo_t*> *kernel_list, void* memory, size_t memsize){
+    struct __attribute__((__packed__)) nv_info_entry{
+        uint8_t format;
+        uint8_t attribute;
+        uint16_t values_size;
+        uint32_t kernel_id;
+        uint32_t value;
+    };
+
+    Elf *elf = NULL;
+    Elf_Scn *section = NULL;
+    Elf_Data *data = NULL, *symbol_table_data = NULL;
+    GElf_Shdr symtab_shdr;
+    size_t symnum;
+    int i = 0;
+    GElf_Sym sym;
+
+    int ret = -1;
+    KernelInfo_t *ki = NULL;
+    const char *kernel_str;
+
+    if (memory == NULL || memsize == 0) {
+        tool::Logging(LOG_ERROR, myName, "memory was NULL or memsize was 0\n");
+        return -1;
+    }
+
+// #define ELF_DUMP_TO_FILE 1
+
+// #ifdef ELF_DUMP_TO_FILE
+    // FILE* fd2 = fopen("flexgv-elf-dump", "wb");
+    // fwrite(memory, memsize, 1, fd2);
+    // fclose(fd2);
+// #endif
+
+    if ((elf = elf_memory((char*)memory, memsize)) == NULL) {
+        tool::Logging(LOG_ERROR, myName, "elf_memory failed\n");
+        goto cleanup;
+    }
+
+    if (CheckElf(elf) != 0) {
+        tool::Logging(LOG_ERROR, myName, "check_elf failed\n");
+        goto cleanup;
+    }
+
+    if (GetSymbolTable(elf, &symbol_table_data, &symnum, &symtab_shdr) != 0) {
+        tool::Logging(LOG_ERROR, myName, "could not get symbol table\n");
+        goto cleanup;
+    }
+
+    if (GetSectionByName(elf, ".nv.info", &section) != 0) {
+        tool::Logging(LOG_REGS, myName, "could not find .nv.info section. This means this binary does not contain any kernels.\n");
+        ret = 0;    // This is not an error.
+        goto cleanup;
+    }
+
+    if ((data = elf_getdata(section, NULL)) == NULL) {
+        tool::Logging(LOG_ERROR, myName, "elf_getdata failed\n");
+        goto cleanup;
+    }
+
+    for (size_t secpos=0; secpos < data->d_size; secpos += sizeof(struct nv_info_entry)) {
+        struct nv_info_entry *entry = (struct nv_info_entry *)((uint8_t*)data->d_buf+secpos);
+        // tool::Logging(LOG_REGS, myName, "%d: format: %#x, attr: %#x, values_size: %#x kernel: %#x, sval: %#x(%d)", 
+        // i++, entry->format, entry->attribute, entry->values_size, entry->kernel_id, 
+        // entry->value, entry->value);
+
+        if (entry->values_size != 8) {
+            tool::Logging(LOG_ERROR, myName, "unexpected values_size: %#x\n", entry->values_size);
+            continue;
+        }
+
+        if (entry->attribute != EIATTR_FRAME_SIZE) {
+            continue;
+        }
+
+        if (entry->kernel_id >= symnum) {
+            tool::Logging(LOG_ERROR, myName, "kernel_id out of bounds: %#x\n", entry->kernel_id);
+            continue;
+        }
+
+        if (gelf_getsym(symbol_table_data, entry->kernel_id, &sym) == NULL) {
+            tool::Logging(LOG_ERROR, myName, "gelf_getsym failed for entry %d\n", entry->kernel_id);
+            continue;
+        }
+
+        if ((kernel_str = elf_strptr(elf, symtab_shdr.sh_link, sym.st_name) ) == NULL) {
+            tool::Logging(LOG_ERROR, myName, "strptr failed for entry %d\n", entry->kernel_id);
+            continue;
+        }
+
+        /* When using (some?) intrinsics, nvcc adds symbols for them in the .nv.info table.
+        * They are prefixed with $__internal_7_$ and are not kernels. We skip them he
+        */
+        const char *intrinsics_prefix = "$__internal_";
+        if (strncmp(kernel_str, intrinsics_prefix, strlen(intrinsics_prefix)) == 0) {
+            continue;
+        }
+
+        if (GetKernelInfoByKernelName(kernel_list, kernel_str) != NULL) {
+            continue;
+        }
+
+        tool::Logging(LOG_REGS, myName, "found new kernel: %s (symbol table id: %#x)\n", kernel_str, entry->kernel_id);
+
+        ki = (KernelInfo_t*)malloc(sizeof(KernelInfo_t));
+        kernel_list->push_back(ki);        
+
+        size_t buflen = strlen(kernel_str)+1;
+        if ((ki->name = (char*)malloc(buflen)) == NULL) {
+            tool::Logging(LOG_ERROR, myName, "malloc failed\n");
+            goto cleanup;
+        }
+        if (strncpy(ki->name, kernel_str, buflen) != ki->name) {
+            tool::Logging(LOG_ERROR, myName, "strncpy failed\n");
+            goto cleanup;
+        }
+
+        if (GetParaForKernel(elf, ki, memory, memsize) != 0) {
+            tool::Logging(LOG_ERROR, myName, "GetParaForKernel failed for kernel %s\n", kernel_str);
+            goto cleanup;
+        }
+    }
+
+    ret = 0;
+ cleanup:
+    if (elf != NULL) {
+        elf_end(elf);
+    }
+    return ret;
+}
+
+KernelInfo_t* GetKernelInfoByKernelName(std::vector<KernelInfo_t*> *kernel_list, const char* kernelName) {
+    if (kernel_list == NULL) {
+        tool::Logging(LOG_ERROR, myName, "kernelMap is NULL\n");
+        return NULL;        
+    }
+    if (kernel_list->empty()) {
+        return NULL;
+    }
+    for (auto ki = kernel_list->begin(); ki != kernel_list->end(); ki++) {
+        if (strcmp((*ki)->name, kernelName) == 0) {
+            return *ki;
+        }
+    }
+    return NULL;
+}
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/.gitmodules b/GPU-Virtual-Service/xpu-pool-service/.gitmodules
deleted file mode 100644
index 2e03ba5..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/.gitmodules
+++ /dev/null
@@ -1,7 +0,0 @@
-[submodule "third_party/kubernetes"]
-	path = third_party/kubernetes
-	url = https://szv-open.codehub.huawei.com/OpenSourceCenter/kubernetes/kubernetes.git
-
-[submodule "third_party/runtime"]
-	path = third_party/runtime
-	url = https://szv-open.codehub.huawei.com/OpenBaize/Ascend/runtime.git
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/VersionSet.xml b/GPU-Virtual-Service/xpu-pool-service/ci/VersionSet.xml
deleted file mode 100644
index 4ba8653..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/VersionSet.xml
+++ /dev/null
@@ -1 +0,0 @@
-<version_set type="microService"></version_set>
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/app_define.json b/GPU-Virtual-Service/xpu-pool-service/ci/app_define.json
deleted file mode 100644
index 6626ee2..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/app_define.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-    "fileVersion": "1",
-    "name": "XPUPoolService",
-    "serviceId": "b6a16627d7cd405697786962a38457d5",
-    "description": "",
-    "version": "1.0.0",
-    "type": "microService",
-    "processes": {
-      "XPUPoolService": {
-        "subscribes": []
-      }
-    }
-  }
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/at/at_deploy.sh b/GPU-Virtual-Service/xpu-pool-service/ci/at/at_deploy.sh
deleted file mode 100644
index 22440bc..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/at/at_deploy.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-# Copyright (C) Huawei Technologies Co., Ltd. 2024-2025. All rights reserved.
-set -e
-
-function arch_config() {
-    arch=$(uname -m)
-    if [[ ${arch} == "x86_64" ]]; then
-        platform="x86"
-    elif [[ ${arch} == "aarch64" ]]; then
-        platform="arm"
-    else
-        echo "incorrect arch mode"
-        exit 1
-    fi
-}
-
-build_version=$(cat buildInfo.properties | sed -n 's/.*=//p')
-echo ${build_version}
-
-# get product from CI pipeline for this project:
-artget pull 56e9abca9a9045a98c283fd0cc958ffc ${build_version} -ca snapshot -at cloudartifact -ap deploy
-
-ssh -o "StrictHostKeyChecking no" ${execute_environment} "rm -rf /data/ci/at"
-cd ${WORKSPACE}/${branch}/test
-scp -r at ${execute_environment}:/data/ci/
-
-arch_config
-upload_arch=$(echo ${arch} | sed 's/_/-/g')
-cd ${WORKSPACE}/deploy/software
-scp ${upload_version}_${upload_arch}.zip ${execute_environment}:/data/ci/at/
-ssh ${execute_environment} "cd /data/ci/at && sh runtest.sh --artifact ${upload_version}_${upload_arch}.zip"
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/at/at_deploy.yml b/GPU-Virtual-Service/xpu-pool-service/ci/at/at_deploy.yml
deleted file mode 100644
index c8df6a8..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/at/at_deploy.yml
+++ /dev/null
@@ -1,53 +0,0 @@
----
-version: 2.0
-
-buildspace:
-  fixed: true
-  path: /usr1/workspace
-
-envs:
-  - condition: env_type == 'docker'
-    resource:
-      type: docker
-      image: ${image_name_x86}
-      resource_class: 16U32G
-      mode: toolbox
-  - condition: env_type == "vpc"
-    resource:
-      type: docker
-      image: ${image_name_x86}
-      pool: ${img_pool_x86}
-      resource_class: 16U32G
-
-buildspace:
-  fixed: true
-  path: /usr1/workspace
-  
-params:
-  - name: product
-    value: cloudbuild2.0
-  - name: CB_AUTO_CHECK_VERSION
-    value: 2.0
-  - name: CB_META_ENABLE_SWBOM
-    value: true
-  - name: CB_META_ENABLE_FILE_SWBOM
-    value: true
-  - name: CB_META_CMC_DEPENDENCY_V2
-    value: true
-
-steps:
-  PRE_BUILD:
-    - checkout:
-        path: ${branch}
-  BUILD:
-    - build_execute:
-        command: |
-          sh ${branch}/ci/buildinfo.sh
-          sh ${branch}/ci/at/at_deploy.sh
-        accelerate: false
-        enhance:
-          - feature: md5_source_tracement
-            build_tools: [maven]
-        check:
-          auto: true
-          mode: sync
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/build.sh b/GPU-Virtual-Service/xpu-pool-service/ci/build.sh
deleted file mode 100644
index 60273c5..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/build.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash
-# Copyright (C) Huawei Technologies Co., Ltd. 2024-2025. All rights reserved.
-set -e
-
-WORK_DIR=$(cd $(dirname $0); pwd)
-DEST_DIR=$WORK_DIR/../xpu_pool/xpu_docker_build/
-
-function prepare() {
-    mkdir -p ${DEST_DIR}/cuda_client/GPU_client/
-    mkdir -p ${DEST_DIR}/acl_client/NPU_client/
-    mkdir -p ${WORK_DIR}/../XPU_symbols/
-}
-
-function handle_spdlog() {
-    mkdir -m 750 -p /usr/local/include
-    cd third_party/spdlog
-    cp -P --remove-destination -rf include/spdlog /usr/local/include
-    chmod 750 -R /usr/local/include
-    cd ${WORK_DIR}
-}
-
-function compile_client() {
-    cd ${WORK_DIR} && rm -rf build && mkdir build && cd build
-    cmake -DCMAKE_BUILD_TYPE=Release ../../ && make -j
-}
-
-function strip_gotest_codes() {
-    if [ ! -d "$1" ]; then
-        echo "Error: Directory '$1' does not exist."
-        return
-    fi
-    cd "$1"
-    find . -name *_test.go | xargs rm -rf
-    sed -i '/gomonkey/d' go.mod
-    go mod tidy
-}
-
-function compile_device_plugin() {
-    # strip gomoney related codes to make SwInfoTree happy
-    strip_gotest_codes "${WORK_DIR}/../GPU-device-plugin/"
-    cd ${WORK_DIR}/../GPU-device-plugin/ && make -j
-}
-
-function compile_xpu_exporter() {
-    # strip gomoney related codes to make SwInfoTree happy
-    strip_gotest_codes "${WORK_DIR}/../xpu-exporter/"
-    cd ${WORK_DIR}/../xpu-exporter/ && make clean && make -j
-}
-
-function strip_symbols() {
-    cd ${WORK_DIR}/build/direct/cuda
-    objcopy --only-keep-debug libcuda_direct.so libcuda_direct.sym
-    objcopy --only-keep-debug gpu-monitor gpu-monitor.sym
-    objcopy --strip-all libcuda_direct.so
-    objcopy --strip-all gpu-monitor
-
-    cd ${WORK_DIR}/build/direct/acl
-    objcopy --only-keep-debug libruntime_direct.so libruntime_direct.sym
-    objcopy --only-keep-debug npu-monitor npu-monitor.sym
-    objcopy --strip-all libruntime_direct.so
-    objcopy --strip-all npu-monitor
-}
-
-function copy_to_build_dir() {
-    cd ${WORK_DIR}/build
-    cp -P --remove-destination -r direct/cuda/libcuda_direct.so ${DEST_DIR}/cuda_client/GPU_client/
-    cp -P --remove-destination -r direct/cuda/gpu-monitor.so ${DEST_DIR}/cuda_client/GPU_client/
-    cp -P --remove-destination -r $WORK_DIR/../client_update/cuda-client-update.sh ${DEST_DIR}/cuda_client/GPU_client/
-
-    cp -P --remove-destination -r direct/cuda/*.sym ${WORK_DIR}/../XPU_symbols/
-
-    cp -P --remove-destination -r direct/acl/libruntime_direct.so ${DEST_DIR}/acl_client/NPU_client/
-    cp -P --remove-destination -r direct/acl/npu-monitor ${DEST_DIR}/acl_client/NPU_client/
-    cp -P --remove-destination -r $WORK_DIR/../client_update/acl-client-update.sh ${DEST_DIR}/acl_client/NPU_client/
-
-    cp -P --remove-destination -r direct/acl/*.sym ${WORK_DIR}/../XPU_symbols/
-
-    cd ${WORK_DIR}/../GPU-device-plugin/
-    cp -P --remove-destination -r gpu-device-plugin ${DEST_DIR}/gpu-device-plugin
-    cp -P --remove-destination -r npu-device-plugin ${DEST_DIR}/npu-device-plugin
-    cp -P --remove-destination -r xpu-client-tool ${DEST_DIR}/cuda_client/GPU_client/
-    cp -P --remove-destination -r xpu-client-tool ${DEST_DIR}/acl_client/NPU_client/
-
-
-    cd ${WORK_DIR}/../xpu-exporter/
-    cp -P --remove-destination -r xpu-exporter ${DEST_DIR}/exporter
-
-    cd ${WORK_DIR}/../XPU_symbols && tar -czvf XPU_symbols.tar.gz XPU_symbols
-}
-
-function main() {
-    prepare
-    handle_spdlog
-    compile_client
-    compile_device_plugin
-    compile_xpu_exporter
-    strip_symbols
-    copy_to_build_dir
-}
-
-main "$@"
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/build.yml b/GPU-Virtual-Service/xpu-pool-service/ci/build.yml
deleted file mode 100644
index 48665ef..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/build.yml
+++ /dev/null
@@ -1,48 +0,0 @@
----
-version: 2.0
-
-buildspace:
-  fixed: true
-  path: /usr1/workspace
-
-params:
-  - name: image_name_x86
-    value: szvecr02.his.huawei.com:80/ecr-build/modelengine_xpupool:x86_v6
-  - name: image_name_arm
-    value: szvecr02.his.huawei.com:80/ecr-build/acs_xpupool:arm_v03
-  - name: env_type
-    value: docker
-  - name: img_pool_x86
-    value: docker-sz-service-x86-ondocker-16u-01
-  - name: img_pool_arm
-    value: docker-sz-service-arm-ondocker-64u-02
-
-envs:
-  - condition: env_type == 'label'
-    label: ${eulerx86_label}
-  - condition: env_type == 'docker'
-    resource:
-      type: docker
-      image: ${image_name_x86}
-      resource_class: 16U32G
-      mode: toolbox
-  - condition: env_type == "vpc"
-    resource:
-      type: docker
-      image: ${image_name_x86}
-      pool: ${img_pool_x86}
-      resource_class: 16U32G
-
-buildflow:
-  strategy: Eager
-  flow_metadata:
-    from: job_xpu_pool_build_x86
-  attach_workspace:
-    path: pub_dir
-    resource: efs
-  jobs:
-    - job: job_xpu_pool_build_x86
-      params:
-        - name: key1
-          value: value1
-      build_ref: ci/xpu_pool/build_x86.yml
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/buildinfo.sh b/GPU-Virtual-Service/xpu-pool-service/ci/buildinfo.sh
deleted file mode 100644
index 15149ee..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/buildinfo.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-# Copyright (C) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved.
-set -e
-
-echo "Release is ${ENV_IS_RELEASE}"
-
-# 判断当前构建是否为版本构建，以决定构建变量
-if [ "${ENV_IS_RELEASE}" == "false" ]; then
-    SERVICE_VERSION='1.0.0-SNAPSHOT'
-    echo "buildVersion=${SERVICE_VERSION}.${ENV_PIPELINE_STARTTIME}">"${WORKSPACE}"/buildInfo.properties
-else
-    if [ "${ENV_IS_RELEASE}" == "true" ]; then
-        SERVICE_VERSION=${ENV_RELEASE_VERSION}
-        echo "buildVersion=${ENV_RELEASE_VERSION}">"${WORKSPACE}"/buildInfo.properties
-    fi
-fi
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/cmc/openSource_x86.xml b/GPU-Virtual-Service/xpu-pool-service/ci/cmc/openSource_x86.xml
deleted file mode 100644
index b7c59c4..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/cmc/openSource_x86.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project>
-  <dependencies>
-    <dependency>
-      <versionType>Component</versionType>
-      <repoType>Generic</repoType>
-      <id>
-        <offering>EulerOS Server</offering>
-        <name>EulerOSServerV200R013C00X86</name>
-        <version>2024.07.04.103000</version>
-      </id>
-      <copies>
-        <copy>
-          <source>Software/x86_64/DockerStack/EulerOS_Server_*.-docker.x86_64.tar.xz</source>
-          <dest>EulerOS_Server/x86</dest>
-        </copy>
-        <copy>
-          <source>Software/x86_64/EulerOS-*-x86_64-dvd.iso</source>
-          <dest>EulerOS_Server/x86</dest>
-        </copy>
-      </copies>
-    </dependency>
-  </dependencies>
-</project>
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/cmc/upload_cmc.xml b/GPU-Virtual-Service/xpu-pool-service/ci/cmc/upload_cmc.xml
deleted file mode 100644
index 88fe24a..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/cmc/upload_cmc.xml
+++ /dev/null
@@ -1,25 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<project>
-  <artifact>
-    <versionType>BVersion</versionType>
-    <repoType>Generic</repoType>
-    <id>
-      <offering>eContainer</offering>
-      <version>${CMC_VERSION}</version>
-    </id>
-    <isClear>N</isClear>
-    <copies>
-      <copy>
-        <source>output/software/*</source>
-        <dest></dest>
-      </copy>
-      <copy>
-        <repoUsage>inner</repoUsage>
-        <source>output/inner/*</source>
-        <dest>symbol</dest>
-      </copy>
-    </copies>
-  </artifact>
-  <dependencies>
-  <dependencies/>
-</project>
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/cms_signature.sh b/GPU-Virtual-Service/xpu-pool-service/ci/cms_signature.sh
deleted file mode 100644
index 13bedaa..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/cms_signature.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/bin/bash
-# Copyright Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
-# 构建签名脚本
-set -e
-
-current_dir=$(
-    cd "$(dirname "$0")" || exit 1
-    pwd
-)
-workspace=$(dirname "${current_dir}")
-pkg_path=$1
-signature_jar=$(find /opt/buildtools/ -name signature.jar)
-
-if [ ! -d "${workspace}"/CI ]; then
-    mkdir -p "${workspace}"/CI
-fi
-
-function gen_list() {
-    for file in "$1"/*; do
-        if [ -d "${file}" ]; then
-            gen_list "$file"
-        else
-            echo "$file" is file
-            if [ "$(basename "$file")"x != listx ]; then
-                cat <<EOF >> "${pkg_path}"/list
-Name: ${file##*$pkg_path/}
-SHA256-Digest: $(sha256sum "${file}" | awk '{print $1}')
-EOF
-            fi
-        fi
-    done 
-}
-
-function gen_signature_xml() {
-    cat << SIG_CONF > "${workspace}"/CI/signconf_cms.xml
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- 由产品CI配置此文件，供私有构建、团队构建、发布构建等各级工程共享 -->
-<signtasks>
-  <signtask name="linux_single">
-    <alias>CMS_Computing_RSA2048_CN_20220810_Huawei</alias>
-    <fileset path="${pkg_path}">
-      <include>**/list</include>
-    </fileset>
-    <crlfile>${pkg_path}/list.cms.crl/</crlfile>
-    <hashtype>2</hashtype>
-    <signaturestandard>5</signaturestandard>
-    <proxylist>10.29.154.209:12056</proxylist>
-    <productlineid>049944</productlineid>
-    <versionid>260185123</versionid>
-  </signtask>
-</signtasks>
-SIG_CONF
-}
-
-cd "${pkg_path}"
-cat <<EOF >"${pkg_path}"/list
-Manifest Version: 1.0
-Create By: Huawei Technology Inc.
-EOF
-
-gen_list "${pkg_path}"
-gen_signature_xml
-java -jar "${signature_jar}" "${workspace}"/CI/signconf_cms.xml
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/dependency.xml b/GPU-Virtual-Service/xpu-pool-service/ci/dependency.xml
deleted file mode 100644
index 7a849d1..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/dependency.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<manifest>
-  <remote name="codehub" fetch="https://codehub-y.huawei.com/" />
-  <remote name="open-codehub" fetch="https://open.codehub.huawei.com/" />
-  <default remote="codehub" revision="master" />
-  <!-- includes for self -->
-  <include name="ci/opensource.xml" />
-</manifest>
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/hwp7s_signature.sh b/GPU-Virtual-Service/xpu-pool-service/ci/hwp7s_signature.sh
deleted file mode 100644
index 6562fb5..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/hwp7s_signature.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-# Copyright Huawei Technologies Co., Ltd. 2025-2025. All rights reserved.
-# hwp7s签名用于CMC B版本发布
-set -e
-
-pkg_path=$1
-current_dir=$(
-    cd "$(dirname "$0")" || exit 1
-    pwd
-)
-workspace=$(dirname "${current_dir}")
-signature_jar=$(find /opt/buildtools/ -name signature.jar)
-
-function gen_signature_xml() {
-    cat << EOF > "${workspace}"/CIConfig.xml
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- 由产品CI配置此文件，供私有构建、团队构建、发布构建等各级工程共享 -->
-<signtasks>
-  <signtask name="cms_sign">
-    <alias>CMS_G5_Test_Sign_RSA3072PSS_CN_20220505_HUAWEI</alias>
-    <timestampalias>CMS_G5_Test_TSA_RSA3072PSS_CN_20220505_HUAWEI</timestampalias>
-    <fileset path="${pkg_path}">
-      <include>**/*.zip</include>
-      <include>**/*.iso</include>
-      <include>**/*.tar</include>
-      <include>**/*.tar.gz</include>
-      <include>**/*.tgz</include>
-    </fileset>
-    <crlfile>${pkg_path}/crldata.crl/</crlfile>
-    <hashtype>2</hashtype>
-    <proxylist>10.29.154.209:12056</proxylist>
-    <signaturestandard>5</signaturestandard>
-    <productlineid>049944</productlineid>
-    <versionid>261181132</versionid>
-    <padmode>1</padmode>
-  </signtask>
-</signtasks>
-EOF
-}
-
-gen_signature_xml
-
-# sign
-if ! java -jar "${signature_jar}" "${workspace}"/CIConfig.xml; then
-    echo "signature execute failed. exit."
-    exit 1
-fi
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/opensource.xml b/GPU-Virtual-Service/xpu-pool-service/ci/opensource.xml
deleted file mode 100644
index 8c8a184..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/opensource.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<manifest>
-  <project remote="open-codehub" groups="spilog" path="${branch}/ci/third_party/spilog" name="OpenSourceCenter/gabime/spdlog.git" revision="refs/tags/v1.12.0" />
-  <project remote="open-codehub" groups="kubernetes" path="${branch}/ci/third_party/kubernetes" name="OpenSourceCenter/kubernetes/kubernetes.git" revision="refs/tags/v1.31.1-h2" />
-  <project remote="open-codehub" groups="ascend-runtime" path="${branch}/ci/third_party/runtime" name="OpenBaize/Ascend/runtime.git" revision="5f02d026a1c8adc5b1d003b694f2ae6e1cca9a7f" />
-</manifest>
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/third_party b/GPU-Virtual-Service/xpu-pool-service/ci/third_party
deleted file mode 100644
index b0a49d0..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/third_party
+++ /dev/null
@@ -1 +0,0 @@
-../third_party
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/build_x86.yml b/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/build_x86.yml
deleted file mode 100644
index 23322cf..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/build_x86.yml
+++ /dev/null
@@ -1,70 +0,0 @@
-version: 2.0
-
-buildspace:
-  fixed: true
-  path: /usr1/workspace
-
-envs:
-  - condition: env_type == 'docker'
-    resource:
-        type: docker
-        image: ${image_name_x86}
-        resource_class: 16U32G
-        mode: toolbox
-  - condition: env_type == "vpc"
-    resource:
-        type: docker
-        image: ${image_name_x86}
-        pool: ${img_pool_x86}
-        resource_class: 16U32G
-
-steps:
-  PRE_BUILD:
-    - checkout:
-        path: ${branch}  # 下载子路径，可选，如果配置，则会把代码下载到子子路径下，如果不配置，则会下载到当前路径
-    - manifest_checkout:
-        manifest_file: ci/dependency.xml
-        groups: spdlog,kubernetes,ascend-runtime
-        repo_depth: 0
-    - artget:
-        artifact_type: cmcbinary
-        action: pull
-        dependency: ${branch}/ci/cmc/copenSource_x86.xml
-        version_output_path: ./
-        agent: ./
-        username: ${CMC_USERNAME}
-        password: ${CMC_PASSWORD}
-
-  BUILD:
-    - build_execute:
-        command: |
-          sh ${branch}/ci/buildinfo.sh
-          sh ${branch}/ci/xpu_pool/build_xpu_package.sh
-        accelerate: false
-        check:
-          buildcheck: true
-          auto: true
-          exclude_dir: ${branch}/manager-b/deploy/agent/
-
-  POST_BUILD:
-    - artget:
-        artifact_type: cloudartifact
-        file_path: output
-        version_output_path: ./
-    - version_set: # 记录version set
-        metadata: true # 开启元数据采集，结合元数据时必要
-        isKiaScan: false
-    - when:
-        condition: upload_cmc == 'true'
-        steps:
-          - artget:
-              artifact_type: cmcbinary
-              action: push
-              params: {"CMC_VERSION": "${CMC_VERSION}"}
-              dependency: ${branch}/ci/cmc/upload_cmc.xml
-              agent: .
-              version_output_path: .
-              add_source_code: push
-              add_env_image: push
-              username: ${CMC_USERNAME}
-              password: ${CMC_PASSWORD}
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/build_xpu_package.sh b/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/build_xpu_package.sh
deleted file mode 100644
index b823854..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/build_xpu_package.sh
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/bin/bash
-# Copyright (C) Huawei Technologies Co., Ltd. 2024-2025. All rights reserved.
-set -e
-
-build_target=$1
-image_name=$2
-current_dir=$(
-    cd "$(dirname "$0")" || exit 1
-    pwd
-)
-top_dir=$(dirname "$(dirname "${current_dir}")")
-pkg_dir=${top_dir}/pkg
-host_scripts_dir=${current_dir}/host_scripts
-
-echo "current_dir=${current_dir}"
-echo "top_dir=${top_dir}"
-echo "pkg_dir=${pkg_dir}"
-echo "build_target=${build_target}"
-echo "image_name=${image_name}"
-
-function arch_config() {
-    arch=$(uname -m)
-    if [[ ${arch} == "x86_64" ]]; then
-        platform="x86"
-    elif [[ ${arch} == "aarch64" ]]; then
-        platform="arm"
-    else
-        echo "incorrect arch mode"
-        exit 1
-    fi
-}
-
-function mk_xpu_pkg_dir() {
-    [ -e "${pkg_dir}" ] && rm -rf "${pkg_dir}"
-    mkdir -p "${pkg_dir}"/images
-    mkdir -p "${pkg_dir}"/templates
-    chmod -R 750 "${pkg_dir}
-}
-
-function build_xpu_component() {
-    echo "build xpu component begin"
-    cd ${top_dir}/ci && sh build.sh
-    echo "build xpu component end"
-}
-
-function get_helm_package() {
-    cd ${top_dir}/install/helm && helm package gpupool 
-    cp -P --remove-destination -rf gpupool-0.1.0.tgz "${pkg_dir}/templates"
-    cp -P --remove-destination -rf ../install.sh "${pkg_dir}/templates"
-    cp -P --remove-destination -rf ../uninstall.sh "${pkg_dir}"
-}
-
-function mknod_func() {
-    loopfile_firstname="/dev/loop0"
-    loopfile_num=0
-    loopfile_name=/dev/loop"${loopfile_num}"
-    while true; do
-        if [ -b "${loopfile_name}" ]; then
-            loopfile_num=$(expr ${loopfile_num} + 1)
-            loopfile_name=/dev/loop${loopfile_num}
-        else
-            sudo mknod ${loopfile_name} b 7 "${loopfile_num}"
-            sudo chmod 660 "${loopfile_name}"
-            sudo chown root:disk "${loopfile_name}"
-            echo "${loopfile_name}"
-            break
-        fi
-    done
-}
-
-function make_docker_base_image() {
-    # CurrentDir: code_branch/XPUPoolService/ci/xpu_pool/
-    mkdir -p "${current_dir}/xpu_docker_build/exporter/euler"
-    mkdir -p "${top_dir}/plugin-market/euler"
-    mknod_func
-    sudo mount "${top_dir}"/../EulerOS_Server/"${platform}"/EulerOS-*-dvd.iso "${current_dir}/xpu_docker_build/exporter/euler"
-    sudo mount "${top_dir}"/../EulerOS_Server/"${platform}"/EulerOS-*-dvd.iso "${top_dir}/plugin-market/euler"
-    cd "${current_dir}"
-    docker import "${top_dir}"/../EulerOS_Server/"${platform}"/EulerOS_Server_*.tar.xz euleros:econtainer
-}
-
-function build_image() {
-    echo "build $2 image begin"
-    cd ${current_dir}/xpu_docker_build/$1
-    local tag="$2:${image_tag}"
-    docker build --squash --no-cache -t $tag .
-    echo "build $2 image end"
-    shift 2
-    for package in "$@"; do
-        image_export_list[$package]+="$tag"
-    done
-}
-
-function export_images() {
-    echo "save images begin"
-    docker save -o "${pkg_dir}/images/gpupool_${platform}.tar" ${image_export_list[gpu]}
-    docker save -o "${pkg_dir}/images/npupool_${platform}.tar" ${image_export_list[npu]}
-    echo "save images end"
-}
-
-function build_output_packages() {
-    cd "${pkg_dir}"
-    mkdir -p ${WORKSPACE}/output/software
-    upload_arch=$(echo ${arch} | sed 's/_/-/g')
-    zip -1 -y ${WORKSPACE}/output/software/${xpupool_plugin}_${upload_arch}.zip *
-    mkdir -p ${WORKSPACE}/output/inner
-    cp -P --remove-destination -rf ${top_dir}/XPU_symbols.tar.gz \
-        ${WORKSPACE}/output/inner/${xpupool_plugin}_${upload_arch}_sym.tar.gz
-    cd -
-}
-
-function main() {
-    local -A image_export_list
-    arch_config
-    mk_xpu_pkg_dir
-    build_xpu_component
-    get_helm_package
-    make_docker_base_image
-    cd ${top_dir}/plugin-market &&sh build_daemonset.sh
-    build_image "cuda_client" "cuda_client_update" gpu
-    build_image "acl_client" "acl_client_update" npu
-    build_image "gpu-device-plugin" "gpu_device_plugin" gpu
-    build_image "npu-device-plugin" "npu_device_plugin" npu
-    build_image "exporter" "xpu_exporter" gpu npu
-    export_images
-    sh ${current_dir}/../cms_signature.sh ${pkg_dir}
-    build_output_packages
-    sh ${current_dir}/../hwp7s_signature.sh ${WORKSPACE}/output/software
-}
-
-main "$@"
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/acl_client/Dockerfile b/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/acl_client/Dockerfile
deleted file mode 100644
index 92d86a7..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/acl_client/Dockerfile
+++ /dev/null
@@ -1,10 +0,0 @@
-FROM euleros:econtainer
-
-# 复制编译好的 npu_client 文件
-COPY ./NPU_client /root
-
-# /root 目录下除了项目输出文件外没有非隐藏文件，以小数点(.)开头的隐藏文件不会被通配符(*)匹配
-RUN chmod 500 /root/*
-
-USER root
-ENTRYPOINT ["/root/acl-client-update.sh"]
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/cuda_client/Dockerfile b/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/cuda_client/Dockerfile
deleted file mode 100644
index 4fd8bf9..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/cuda_client/Dockerfile
+++ /dev/null
@@ -1,10 +0,0 @@
-FROM euleros:econtainer
-
-# 复制编译好的 xpu_client 文件
-COPY ./GPU_client /root
-
-# /root 目录下除了项目输出文件外没有非隐藏文件，以小数点(.)开头的隐藏文件不会被通配符(*)匹配
-RUN chmod 500 /root/*
-
-USER root
-ENTRYPOINT ["/root/cuda-client-update.sh"]
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/exporter/Dockerfile b/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/exporter/Dockerfile
deleted file mode 100644
index fa72b5e..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/exporter/Dockerfile
+++ /dev/null
@@ -1,30 +0,0 @@
-FROM euleros:econtainer AS xpu_exporter
-
-COPY ./euler/ /opt/euler/
-# 复制编译好的 xpu_exporter 文件
-COPY xpu-exporter /opt/xpu/bin/
-
-RUN echo "[dvd]" >> /etc/yum.repos.d/dvd.repo \
-    && echo "name=install dvd" >> /etc/yum.repos.d/dvd.repo \
-    && echo "baseurl=file:///opt/euler" >> /etc/yum.repos.d/dvd.repo \
-    && echo "enabled=1" >> /etc/yum.repos.d/dvd.repo \
-    && echo "gpgcheck=0" >> /etc/yum.repos.d/dvd.repo \
-    && yum clean all && yum makecache \
-    && yum -y install openssl \
-    && rm -rf /opt/euler \
-    && rm -f /etc/yum.repos.d/dvd.repo
-
-RUN chmod 500 /opt/xpu/bin/xpu-exporter
-
-# 当前容器创建的文件可能暴露到宿主机，从而与宿主机甚至其他容器的用户id碰撞
-# 选择 10001 作为用户id/组id是为了避免与 useradd 自动生成的id碰撞
-# 在本容器内新增用户时应当注意避免id碰撞
-RUN echo "xpu:x:10001:10001:eXPUPoolService:/:/sbin/nologin" >> /etc/passwd \
-    && echo "xpu:x:10001:" >> /etc/group \
-    && echo "xpu:!:::::::" >> /etc/shadow \
-    && chown xpu:xpu /opt/xpu/bin/xpu-exporter \
-    && setcap CAP_DAC_OVERRIDE=ep /opt/xpu/bin/xpu-exporter
-
-USER xpu:xpu
-
-ENTRYPOINT ["/opt/xpu/bin/xpu-exporter"]
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/gpu-device-plugin/Dockerfile b/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/gpu-device-plugin/Dockerfile
deleted file mode 100644
index 59828ec..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/gpu-device-plugin/Dockerfile
+++ /dev/null
@@ -1,19 +0,0 @@
-FROM euleros:econtainer AS gpu_device_plugin
-
-# 复制编译好的 gpu-device-plugin 文件
-COPY gpu-device-plugin /opt/xpu/bin/
-
-RUN chmod 500 /opt/xpu/bin/gpu-device-plugin
-
-# 当前容器创建的文件可能暴露到宿主机，从而与宿主机甚至其他容器的用户id碰撞
-# 选择 10001 作为用户id/组id是为了避免与 useradd 自动生成的id碰撞
-# 在本容器内新增用户时应当注意避免id碰撞
-RUN echo "xpu:x:10001:10001:eXPUPoolService:/:/sbin/nologin" >> /etc/passwd \
-    && echo "xpu:x:10001:" >> /etc/group \
-    && echo "xpu:!::::::::" >> /etc/shadow \
-    && chown xpu:xpu /opt/xpu/bin/gpu-device-plugin \
-    && setcap CAP_DAC_OVERRIDE=ep /opt/xpu/bin/gpu-device-plugin
-
-USER xpu:xpu
-
-ENTRYPOINT ["/opt/xpu/bin/gpu-device-plugin"]
\ No newline at end of file
diff --git a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/npu-device-plugin/Dockerfile b/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/npu-device-plugin/Dockerfile
deleted file mode 100644
index 02d0558..0000000
--- a/GPU-Virtual-Service/xpu-pool-service/ci/xpu_pool/xpu_docker_build/npu-device-plugin/Dockerfile
+++ /dev/null
@@ -1,19 +0,0 @@
-FROM euleros:econtainer AS npu_device_plugin
-
-# 复制编译好的 npu-device-plugin 文件
-COPY npu-device-plugin /opt/xpu/bin/
-
-RUN chmod 500 /opt/xpu/bin/npu-device-plugin
-
-# 运行时必须使用root用户，否则会报错。
-# 使用非root权限用户运行会导致npu-device-plugin运行时加载libdcml.so文件失败。原因如下：
-# 1. ascended-npu-exporter在加载libdcml.so动态库时会分别尝试在 LD_LIBRARY_PATH 环境变量指定的目录和 .ldconfig 缓存项目中查找动态文件。
-# 2. 构建容器镜像时，如果指定的运行用户是非root权限用户，我们需要对npu-device-plugin二进制文件进行CAP_DAC_OVERRIDE=ep的授权操作。
-#    由于对npu-device-plugin的授权，导致程序无法获取到包含libdcml.so文件路径的"LD_LIBRARY_PATH"环境变量。
-#    程序无法通过环境变量设置的路径找到libdcml.so，这将导致在初始化的时候无法找到so文件。
-# 3. 当环境变量中找不到libdcml.so文件时，程序会执行"ldconfig"命令并获取缓存内容，但是libdcml.so文件只在运行环境中存在。
-#    构建环境下的ldconfig无法识别该文件，因此无法将其写入缓存（更新缓存内容需要使用root权限）。
-#    因此构建容器镜像时，如果指定的运行用户是非root权限用户，容器内部无法使用ldconfig更新缓存。
-USER root
-
-ENTRYPOINT ["/opt/xpu/bin/npu-device-plugin"]
\ No newline at end of file