In [None]:
import requests

file_url = "https://huggingface.co/datasets/nhotin/segment-text/resolve/main/model.onnx"
file_path = "model.onnx"  
response = requests.get(file_url, stream=True)
if response.status_code == 200:
    with open(file_path, "wb") as f:
        for chunk in response.iter_content(chunk_size=1024):
            f.write(chunk)
    print(f"Đã tải file ONNX thành công: {file_path}")
else:
    print(f"Lỗi khi tải file: {response.status_code}")


In [None]:
!pip install numpy onnx
!pip install tensorrt==10.7.0
!pip install pycuda

In [None]:
import onnx
import os
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import time
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

In [None]:
def check_tensorrt():
    try:
        print(f"TensorRT version: {trt.__version__}")
        print(f"CUDA version: {os.popen('nvcc --version').read().strip()}")
    except ModuleNotFoundError:
        print("TensorRT chưa được cài")

check_tensorrt()

In [None]:
def check_onnx_model(onnx_path):
    model = onnx.load(onnx_path)
    print(f"Model name: {model.graph.name}")

    for input_tensor in model.graph.input:
        dtype = onnx.TensorProto.DataType.Name(input_tensor.type.tensor_type.elem_type)
        print(f"Input: {input_tensor.name}, Type: {dtype}")
        
    for output_tensor in model.graph.output:
        dtype = onnx.TensorProto.DataType.Name(output_tensor.type.tensor_type.elem_type)
        print(f"Output: {output_tensor.name}, Type: {dtype}")

onnx_path = "/kaggle/working/model.onnx"
check_onnx_model(onnx_path)

In [None]:
def print_model_info(onnx_path):
    model = onnx.load(onnx_path)
    print("\nModel Information:")
    print("=================")
    print(f"Model Name: {model.graph.name}")
    print("\nInputs:")
    for input in model.graph.input:
        print(f"- Name: {input.name}")
        print(f"  Shape: {[dim.dim_value if dim.dim_value else '?' for dim in input.type.tensor_type.shape.dim]}")
        print(f"  Type: {onnx.TensorProto.DataType.Name(input.type.tensor_type.elem_type)}")
    
    print("\nOutputs:")
    for output in model.graph.output:
        print(f"- Name: {output.name}")
        print(f"  Shape: {[dim.dim_value if dim.dim_value else '?' for dim in output.type.tensor_type.shape.dim]}")
        print(f"  Type: {onnx.TensorProto.DataType.Name(output.type.tensor_type.elem_type)}")

def build_engine(onnx_file_path, engine_file_path):
    # First, print model information
    print_model_info(onnx_file_path)
    
    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
    print("\nCreating builder...")
    builder = trt.Builder(TRT_LOGGER)
    
    print("Creating network...")
    network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
    
    print("Creating parser...")
    parser = trt.OnnxParser(network, TRT_LOGGER)

    # Parse ONNX model
    print(f"Parsing ONNX file: {onnx_file_path}")
    with open(onnx_file_path, 'rb') as model:
        if not parser.parse(model.read()):
            print('Error: Failed to parse ONNX file')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
            return None

    print("Creating builder config...")
    config = builder.create_builder_config()
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)  # 1GB workspace
    
    # Enable FP16 mode
    print("Enabling FP16 mode...")
    if builder.platform_has_fast_fp16:
        config.set_flag(trt.BuilderFlag.FP16)
    
    # Add optimization profile with fixed shape
    print("Adding optimization profile...")
    profile = builder.create_optimization_profile()
    
    # Set fixed shape (batch_size=1, sequence_length=128)
    shape = (1, 128)
    profile.set_shape("input_ids", shape, shape, shape)
    profile.set_shape("attention_mask", shape, shape, shape)
    config.add_optimization_profile(profile)
    
    # Print network information before building
    print("\nNetwork Information:")
    print("===================")
    print(f"Number of layers: {network.num_layers}")
    print(f"Number of inputs: {network.num_inputs}")
    print(f"Number of outputs: {network.num_outputs}")
    for i in range(network.num_outputs):
        output = network.get_output(i)
        print(f"Output {i}: {output.name}, shape={output.shape}, dtype={output.dtype}")
    
    # Build and save engine
    print("\nBuilding engine...")
    engine = builder.build_serialized_network(network, config)
    if engine is None:
        print("Error: Failed to build TensorRT engine")
        return None

    print(f"Saving engine to: {engine_file_path}")
    with open(engine_file_path, 'wb') as f:
        f.write(engine)

    print(f"Model converted successfully! Saved as {engine_file_path}")
    return engine

if __name__ == "__main__":
    onnx_path = "model.onnx"
    engine_path = "model.engine"
    print(f"Starting conversion from {onnx_path} to {engine_path}")
    build_engine(onnx_path, engine_path) 

In [None]:
def load_engine(engine_path):
    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
    with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())
    return engine

engine = load_engine("model.engine")
print("TensorRT Engine đã load thành công!")

In [None]:
from huggingface_hub import HfApi
from kaggle_secrets import UserSecretsClient

# Lấy Hugging Face Token từ Kaggle Secrets
user_secrets = UserSecretsClient()
HF_TOKEN = user_secrets.get_secret("HF_UPLOAD_TOKEN")  # Cần token có quyền `Write`

# Repo Dataset của bạn
repo_id = "nhotin/segment-text"  # Thay thế bằng repo của bạn
file_path = "/kaggle/working/model.engine"  # File engine đã tạo
upload_path = "model.engine"  # Tên file sau khi upload lên HF

# Khởi tạo API và upload file lên Hugging Face Dataset
api = HfApi()
api.upload_file(
    path_or_fileobj=file_path,
    path_in_repo=upload_path,
    repo_id=repo_id,
    repo_type="dataset",  # Bắt buộc với dataset
    token=HF_TOKEN
)

print(f"Đã upload {file_path} lên Hugging Face Dataset: {repo_id}/{upload_path}")