## Quantization + Onnx format ready for production from HF repository

<img src= "https://frenzy86.s3.eu-west-2.amazonaws.com/python/nlp/albert02.png" width=600>

In [1]:
!pip install onnx onnxruntime -q

In [2]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("Frenz/modelsent_test")
model = AutoModelForSequenceClassification.from_pretrained("Frenz/modelsent_test")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
from pathlib import Path
import transformers
from transformers.onnx import FeaturesManager

model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model)
onnx_config = model_onnx_config(model.config)

# export
transformers.onnx.export(
                        preprocessor=tokenizer,
                        model=model,
                        config=onnx_config,
                        opset=13,
                        output=Path("sentiment.onnx")
                        )

(['input_ids', 'attention_mask', 'token_type_ids'], ['last_hidden_state'])

## metterli nella function

In [4]:
#save the tokenizer
import joblib
joblib.dump(tokenizer, "tokenizer_sentiment.pkl")

['tokenizer_sentiment.pkl']

In [5]:
import onnxruntime
import joblib

tokenizer = joblib.load("tokenizer_sentiment.pkl") # load tokenizer
onnx_model_path = "sentiment.onnx"            # load model onnx
ort_session = onnxruntime.InferenceSession(onnx_model_path)

import numpy as np
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=-1, keepdims=True)

def analyze_sentimentinference(row, ort_session, tokenizer):
    inputs = tokenizer(row['text'], return_tensors="pt")
    # Prepare input data for ONNX model
    input_data = {
                "input_ids": inputs["input_ids"].numpy(),
                "attention_mask": inputs["attention_mask"].numpy(),
                "token_type_ids": inputs["token_type_ids"].numpy(),
                }
    output = ort_session.run(None, input_data)
    # Convert logits to probabilities using softmax
    logits = np.array(output[0])
    probabilities = softmax(logits)
    row['sentiment'] = np.argmax(probabilities).item()
    row['prob'] = np.max(probabilities).item()
    return row

In [6]:
import pandas as pd

test = pd.DataFrame({'text': ['i love this product',
                              'i hate this product'],
                     })

# Apply sentiment analysis to each row in the DataFrame
df_result = test.apply(lambda row: analyze_sentimentinference(row, ort_session, tokenizer), axis=1)
df_result

Unnamed: 0,text,sentiment,prob
0,i love this product,1,0.998622
1,i hate this product,0,0.986131


## Quantization 8-Int

In [7]:
from onnxruntime.quantization import quantize_dynamic, QuantType

quantize_dynamic("sentiment.onnx",      #44MB
                 "sentiment-int8.onnx", #38MB
                 weight_type=QuantType.QUInt8
                 )



In [8]:
import onnxruntime
import joblib

tokenizer = joblib.load("tokenizer_sentiment.pkl") # load tokenizer
onnx_model_path = "sentiment-int8.onnx"            # load model quantized int8
ort_session = onnxruntime.InferenceSession(onnx_model_path)

import numpy as np
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=-1, keepdims=True)

def analyze_sentimentinference(row, ort_session, tokenizer):
    inputs = tokenizer(row['text'], return_tensors="pt")
    # Prepare input data for ONNX model
    input_data = {
                "input_ids": inputs["input_ids"].numpy(),
                "attention_mask": inputs["attention_mask"].numpy(),
                "token_type_ids": inputs["token_type_ids"].numpy(),
                }
    output = ort_session.run(None, input_data)
    # Convert logits to probabilities using softmax
    logits = np.array(output[0])
    probabilities = softmax(logits)
    row['sentiment'] = np.argmax(probabilities).item()
    row['prob'] = np.max(probabilities).item()
    return row

In [9]:
test = pd.DataFrame({'text': ['i love this product',
                              'i hate this product',
                              "i love this product, it's awesome!!"]}
                    )

# Apply sentiment analysis to each row in the DataFrame
df_result = test.apply(lambda row: analyze_sentimentinference(row, ort_session,tokenizer), axis=1)
df_result

Unnamed: 0,text,sentiment,prob
0,i love this product,1,0.998751
1,i hate this product,0,0.987771
2,"i love this product, it's awesome!!",1,0.998371


 ## Put single file inside HF repository

In [10]:
## Upload tokenizer and onnx model to HF

In [11]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
# !huggingface-cli upload Frenz/modelsent_test  sentiment-int8.onnx --commit-message "upload model *.onnx"  --repo-type model
# !huggingface-cli upload Frenz/modelsent_test  tokenizer_sentiment.pkl --commit-message "upload tokenizer *.pkl"  --repo-type model


In [12]:
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

from huggingface_hub import HfApi, logging

logging.set_verbosity_debug()
hf = HfApi()
hf.upload_file(path_or_fileobj="tokenizer_sentiment.pkl", path_in_repo="tokenizer_sentiment.pkl", repo_id="Frenz/modelsent_test", repo_type="model")
hf.upload_file(path_or_fileobj="sentiment-int8.onnx", path_in_repo="sentiment-int8.onnx", repo_id="Frenz/modelsent_test", repo_type="model")



About to commit to the hub: 1 addition(s), 0 copie(s) and 0 deletion(s).
DEBUG:huggingface_hub.hf_api:About to commit to the hub: 1 addition(s), 0 copie(s) and 0 deletion(s).
Request 062becc2-fe91-4e5e-a03e-27d45bf661e6: POST https://huggingface.co/api/models/Frenz/modelsent_test/preupload/main (authenticated: True)
DEBUG:huggingface_hub.utils._http:Request 062becc2-fe91-4e5e-a03e-27d45bf661e6: POST https://huggingface.co/api/models/Frenz/modelsent_test/preupload/main (authenticated: True)
Request bbf0f6c7-b67b-48d6-b3d3-5b8ce6808758: POST https://huggingface.co/Frenz/modelsent_test.git/info/lfs/objects/batch (authenticated: True)
DEBUG:huggingface_hub.utils._http:Request bbf0f6c7-b67b-48d6-b3d3-5b8ce6808758: POST https://huggingface.co/Frenz/modelsent_test.git/info/lfs/objects/batch (authenticated: True)
Uploading 1 LFS file to the Hub
DEBUG:huggingface_hub._commit_api:Uploading 1 LFS file to the Hub


tokenizer_sentiment.pkl:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

Request 6f0eaaeb-6ae4-48d7-98dd-ae0d71ea1ff5: PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/b0/83/b083c11095994a6abc68b9126cec9b7709f2b3e4903521a234bb36c8a2ab2132/791bb18be7172ef3ae0024a45566ed1c9077273234bb840b4e69e17d31785ccf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20240616%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240616T111528Z&X-Amz-Expires=900&X-Amz-Signature=221dfa7dfc6e7d2c859f0b46f659434fa6792e68df62ca708a2afadecf22e8ca&X-Amz-SignedHeaders=host&x-amz-storage-class=INTELLIGENT_TIERING&x-id=PutObject (authenticated: False)
DEBUG:huggingface_hub.utils._http:Request 6f0eaaeb-6ae4-48d7-98dd-ae0d71ea1ff5: PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/b0/83/b083c11095994a6abc68b9126cec9b7709f2b3e4903521a234bb36c8a2ab2132/791bb18be7172ef3ae0024a45566ed1c9077273234bb840b4e69e17d31785ccf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credentia

sentiment-int8.onnx:   0%|          | 0.00/40.6M [00:00<?, ?B/s]

Request ca2fed94-ca5d-497a-8fbd-7cd445bd51ed: PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/b0/83/b083c11095994a6abc68b9126cec9b7709f2b3e4903521a234bb36c8a2ab2132/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20240616%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240616T111530Z&X-Amz-Expires=86400&X-Amz-Signature=837a375a040ac203d49070b60913d7aa725ed073b4b162c3d0bd6bf3cc5f39f3&X-Amz-SignedHeaders=host&partNumber=1&uploadId=yY7nC62m4SypUeLDG34DLjoMZoN8Hc43i6kzStYgCtdFm_e5.oN_dMbJFtmt5wk.gRKTM4uCpe885FO7T44h3ieFhZadlVJiNtIaV80ka6F_3CvmlpMgua.Lm0hVuYFy&x-id=UploadPart (authenticated: False)
DEBUG:huggingface_hub.utils._http:Request ca2fed94-ca5d-497a-8fbd-7cd445bd51ed: PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/b0/83/b083c11095994a6abc68b9126cec9b7709f2b3e4903521a234bb36c8a2ab2132/05c4d8eff73e78584b4a615fc4902af8d7272

CommitInfo(commit_url='https://huggingface.co/Frenz/modelsent_test/commit/f80d2d85ffdea1eec46421ad671e27a40024bd3d', commit_message='Upload sentiment-int8.onnx with huggingface_hub', commit_description='', oid='f80d2d85ffdea1eec46421ad671e27a40024bd3d', pr_url=None, pr_revision=None, pr_num=None)

## From HF repository

### All you need are these libraries in your requrements.txt

- huggingface_hub
- onnxruntime
- joblib
- pandas

In [13]:
from huggingface_hub import hf_hub_download

model_path = hf_hub_download(repo_id="Frenz/modelsent_test", filename="sentiment-int8.onnx")
tokenizer_path = hf_hub_download(repo_id="Frenz/modelsent_test", filename="tokenizer_sentiment.pkl")

Request b7b8fd24-3963-47fe-91d2-92b0378f0668: HEAD https://huggingface.co/Frenz/modelsent_test/resolve/main/sentiment-int8.onnx (authenticated: True)
DEBUG:huggingface_hub.utils._http:Request b7b8fd24-3963-47fe-91d2-92b0378f0668: HEAD https://huggingface.co/Frenz/modelsent_test/resolve/main/sentiment-int8.onnx (authenticated: True)
Downloading 'sentiment-int8.onnx' to '/root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317.incomplete'
INFO:huggingface_hub.file_download:Downloading 'sentiment-int8.onnx' to '/root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317.incomplete'
Request ff1e9f10-703e-407e-a6c8-8457e381cbe7: GET https://cdn-lfs-us-1.huggingface.co/repos/b0/83/b083c11095994a6abc68b9126cec9b7709f2b3e4903521a234bb36c8a2ab2132/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317?response-content-disposition=inline%3B+file

sentiment-int8.onnx:   0%|          | 0.00/40.6M [00:00<?, ?B/s]

Download complete. Moving file to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317
INFO:huggingface_hub.file_download:Download complete. Moving file to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317
Creating pointer from ../../blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317 to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/snapshots/f80d2d85ffdea1eec46421ad671e27a40024bd3d/sentiment-int8.onnx
DEBUG:huggingface_hub.file_download:Creating pointer from ../../blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317 to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/snapshots/f80d2d85ffdea1eec46421ad671e27a40024bd3d/sentiment-int8.onnx
Request 6746d643-e20d-4ffd-a140-a94b44da6281: HEAD https://huggingface.co/Frenz/modelsent_test/resolve/main/tokenizer_sentiment.pkl (auth

tokenizer_sentiment.pkl:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

Download complete. Moving file to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/791bb18be7172ef3ae0024a45566ed1c9077273234bb840b4e69e17d31785ccf
INFO:huggingface_hub.file_download:Download complete. Moving file to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/791bb18be7172ef3ae0024a45566ed1c9077273234bb840b4e69e17d31785ccf
Creating pointer from ../../blobs/791bb18be7172ef3ae0024a45566ed1c9077273234bb840b4e69e17d31785ccf to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/snapshots/f80d2d85ffdea1eec46421ad671e27a40024bd3d/tokenizer_sentiment.pkl
DEBUG:huggingface_hub.file_download:Creating pointer from ../../blobs/791bb18be7172ef3ae0024a45566ed1c9077273234bb840b4e69e17d31785ccf to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/snapshots/f80d2d85ffdea1eec46421ad671e27a40024bd3d/tokenizer_sentiment.pkl


In [14]:
import onnxruntime
import joblib
import numpy as np

tokenizer = joblib.load(tokenizer_path) # load tokenizer
onnx_model_path = model_path           # load model quantized int8
ort_session = onnxruntime.InferenceSession(onnx_model_path)

In [None]:
# import numpy as np
# def softmax(x):
#     exp_x = np.exp(x - np.max(x))
#     return exp_x / exp_x.sum(axis=-1, keepdims=True)

# def analyze_sentimentinference(row, ort_session, tokenizer):
#     inputs = tokenizer(row['text'], return_tensors="pt")
#     # Prepare input data for ONNX model
#     input_data = {
#                 "input_ids": inputs["input_ids"].numpy(),
#                 "attention_mask": inputs["attention_mask"].numpy(),
#                 "token_type_ids": inputs["token_type_ids"].numpy(),
#                 }
#     output = ort_session.run(None, input_data)
#     # Convert logits to probabilities using softmax
#     logits = np.array(output[0])
#     probabilities = softmax(logits)
#     row['sentiment'] = np.argmax(probabilities).item()
#     row['prob'] = np.max(probabilities).item()
#     return row

In [15]:
def analyze_sentimentinference(text, ort_session, tokenizer):
    inputs = tokenizer(text, return_tensors="pt")
    ort_inputs = {k: v.cpu().numpy() for k, v in inputs.items()}
    ort_outs = ort_session.run(None, ort_inputs)
    probabilities = np.exp(ort_outs[0][0]) / np.exp(ort_outs[0][0]).sum(-1, keepdims=True)
    sentiment = "Positive" if probabilities[1] > probabilities[0] else "Negative"

    return sentiment, probabilities[1], probabilities[0]

In [16]:
import pandas as pd
test = pd.DataFrame({'text': ['i love this product',
                              'i hate this product',
                              "i love this product, it's awesome!!"]}
                    )

# Apply sentiment analysis to each row in the DataFrame
test[['sentiment','prob Pos','prob Neg']] = test['text'].apply(lambda row: pd.Series(analyze_sentimentinference(row, ort_session,tokenizer))).round(2)
test


Unnamed: 0,text,sentiment,prob Pos,prob Neg
0,i love this product,Positive,1.0,0.0
1,i hate this product,Negative,0.01,0.99
2,"i love this product, it's awesome!!",Positive,1.0,0.0
