## Quantization + Onnx format ready for production from HF repository

<img src= "https://frenzy86.s3.eu-west-2.amazonaws.com/python/nlp/albert02.png" width=600>

In [1]:
!pip install onnx onnxruntime -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.2/13.2 MB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m979.3 kB/s[0m eta [36m0:00:00[0m
[?25h

## Download_model from Savetensor

In [2]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("Frenz/modelsent_test")
model = AutoModelForSequenceClassification.from_pretrained("Frenz/modelsent_test")

tokenizer_config.json:   0%|          | 0.00/1.25k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.27M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/46.7M [00:00<?, ?B/s]

## Convert modell to Onnx

In [3]:
from pathlib import Path
import transformers
from transformers.onnx import FeaturesManager

model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model)
onnx_config = model_onnx_config(model.config)

# export
transformers.onnx.export(
                        preprocessor=tokenizer,
                        model=model,
                        config=onnx_config,
                        opset=13,
                        output=Path("sentiment.onnx")
                        )

(['input_ids', 'attention_mask', 'token_type_ids'], ['last_hidden_state'])

## metterli nella function

In [4]:
#save the tokenizer
import joblib
joblib.dump(tokenizer, "tokenizer_sentiment.pkl")

['tokenizer_sentiment.pkl']

In [5]:
import onnxruntime
import joblib

tokenizer = joblib.load("tokenizer_sentiment.pkl") # load tokenizer
onnx_model_path = "sentiment.onnx"            # load model onnx
ort_session = onnxruntime.InferenceSession(onnx_model_path)

import numpy as np
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=-1, keepdims=True)

def analyze_sentimentinference(row, ort_session, tokenizer):
    inputs = tokenizer(row['text'], return_tensors="pt")
    # Prepare input data for ONNX model
    input_data = {
                "input_ids": inputs["input_ids"].numpy(),
                "attention_mask": inputs["attention_mask"].numpy(),
                "token_type_ids": inputs["token_type_ids"].numpy(),
                }
    output = ort_session.run(None, input_data)
    # Convert logits to probabilities using softmax
    logits = np.array(output[0])
    probabilities = softmax(logits)
    row['sentiment'] = np.argmax(probabilities).item()
    row['prob'] = np.max(probabilities).item()
    return row

In [6]:
import pandas as pd

test = pd.DataFrame({'text': ['i love this product',
                              'i hate this product'],
                     })

# Apply sentiment analysis to each row in the DataFrame
df_result = test.apply(lambda row: analyze_sentimentinference(row, ort_session, tokenizer), axis=1)
df_result

Unnamed: 0,text,sentiment,prob
0,i love this product,1,0.998622
1,i hate this product,0,0.986131


## Quantization 8-Int - quantization directly

In [7]:
from onnxruntime.quantization import quantize_dynamic, QuantType

quantize_dynamic("sentiment.onnx",      #44MB
                 "sentiment-int8.onnx", #38MB
                 weight_type=QuantType.QUInt8
                 )



In [None]:
import onnxruntime
import joblib

tokenizer = joblib.load("tokenizer_sentiment.pkl") # load tokenizer
onnx_model_path = "sentiment-int8.onnx"            # load model quantized int8

ort_session = onnxruntime.InferenceSession(onnx_model_path)

import numpy as np
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=-1, keepdims=True)

def analyze_sentimentinference(row, ort_session, tokenizer):
    inputs = tokenizer(row['text'], return_tensors="pt")
    # Prepare input data for ONNX model
    input_data = {
                "input_ids": inputs["input_ids"].numpy(),
                "attention_mask": inputs["attention_mask"].numpy(),
                "token_type_ids": inputs["token_type_ids"].numpy(),
                }
    output = ort_session.run(None, input_data)
    # Convert logits to probabilities using softmax
    logits = np.array(output[0])
    probabilities = softmax(logits)
    row['sentiment'] = np.argmax(probabilities).item()
    row['prob'] = np.max(probabilities).item()
    return row

In [None]:
test = pd.DataFrame({'text': ['i love this product',
                              'i hate this product',
                              "i love this product, it's awesome!!"]}
                    )

# Apply sentiment analysis to each row in the DataFrame
df_result = test.apply(lambda row: analyze_sentimentinference(row, ort_session,tokenizer), axis=1)
df_result

Unnamed: 0,text,sentiment,prob
0,i love this product,1,0.998751
1,i hate this product,0,0.987771
2,"i love this product, it's awesome!!",1,0.998371


 ## Put single file inside HF repository

In [None]:
## Upload tokenizer and onnx model to HF

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your ter

In [None]:
# !huggingface-cli upload Frenz/modelsent_test  sentiment-int8.onnx --commit-message "upload model *.onnx"  --repo-type model
# !huggingface-cli upload Frenz/modelsent_test  tokenizer_sentiment.pkl --commit-message "upload tokenizer *.pkl"  --repo-type model


In [None]:
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

from huggingface_hub import HfApi, logging

logging.set_verbosity_debug()
hf = HfApi()
hf.upload_file(path_or_fileobj="tokenizer_sentiment.pkl", path_in_repo="tokenizer_sentiment.pkl", repo_id="Frenz/modelsent_test", repo_type="model")
hf.upload_file(path_or_fileobj="sentiment-int8.onnx", path_in_repo="sentiment-int8.onnx", repo_id="Frenz/modelsent_test", repo_type="model")

About to commit to the hub: 1 addition(s), 0 copie(s) and 0 deletion(s).
DEBUG:huggingface_hub.hf_api:About to commit to the hub: 1 addition(s), 0 copie(s) and 0 deletion(s).
Request 59bbce4d-0376-4601-8c32-3b2c207a46ba: POST https://huggingface.co/api/models/Frenz/modelsent_test/preupload/main (authenticated: True)
DEBUG:huggingface_hub.utils._http:Request 59bbce4d-0376-4601-8c32-3b2c207a46ba: POST https://huggingface.co/api/models/Frenz/modelsent_test/preupload/main (authenticated: True)
Request c643b73b-fcd6-411f-b508-13b18c1ff220: POST https://huggingface.co/Frenz/modelsent_test.git/info/lfs/objects/batch (authenticated: True)
DEBUG:huggingface_hub.utils._http:Request c643b73b-fcd6-411f-b508-13b18c1ff220: POST https://huggingface.co/Frenz/modelsent_test.git/info/lfs/objects/batch (authenticated: True)
Uploading 1 LFS file to the Hub
DEBUG:huggingface_hub._commit_api:Uploading 1 LFS file to the Hub


tokenizer_sentiment.pkl:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

Request dbd78baf-61c4-4946-987a-0ee9f14c388a: PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/b0/83/b083c11095994a6abc68b9126cec9b7709f2b3e4903521a234bb36c8a2ab2132/833a6a070977fb1a8de3c9e0ab566188f95f8f28acb504cbc78c45e83584a573?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIA2JU7TKAQLC2QXPN7%2F20240929%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240929T154723Z&X-Amz-Expires=900&X-Amz-Signature=1908b17e04948c758750fd2ea5bbb726664be790d0dec0e6995a4474c7b92ff0&X-Amz-SignedHeaders=host&x-amz-storage-class=INTELLIGENT_TIERING&x-id=PutObject (authenticated: False)
DEBUG:huggingface_hub.utils._http:Request dbd78baf-61c4-4946-987a-0ee9f14c388a: PUT https://hf-hub-lfs-us-east-1.s3-accelerate.amazonaws.com/repos/b0/83/b083c11095994a6abc68b9126cec9b7709f2b3e4903521a234bb36c8a2ab2132/833a6a070977fb1a8de3c9e0ab566188f95f8f28acb504cbc78c45e83584a573?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credentia

CommitInfo(commit_url='https://huggingface.co/Frenz/modelsent_test/commit/20b14b3e87afcebd3416e974539e3933ce758688', commit_message='Upload sentiment-int8.onnx with huggingface_hub', commit_description='', oid='20b14b3e87afcebd3416e974539e3933ce758688', pr_url=None, pr_revision=None, pr_num=None)

## From HF repository

### All you need are these libraries in your requrements.txt

- huggingface_hub
- onnxruntime
- joblib
- pandas

In [None]:
from huggingface_hub import hf_hub_download

model_path = hf_hub_download(repo_id="Frenz/modelsent_test", filename="sentiment-int8.onnx")
tokenizer_path = hf_hub_download(repo_id="Frenz/modelsent_test", filename="tokenizer_sentiment.pkl")

Request b717e802-deb2-43f4-b965-21abf2dde770: HEAD https://huggingface.co/Frenz/modelsent_test/resolve/main/sentiment-int8.onnx (authenticated: True)
DEBUG:huggingface_hub.utils._http:Request b717e802-deb2-43f4-b965-21abf2dde770: HEAD https://huggingface.co/Frenz/modelsent_test/resolve/main/sentiment-int8.onnx (authenticated: True)
Downloading 'sentiment-int8.onnx' to '/root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317.incomplete'
INFO:huggingface_hub.file_download:Downloading 'sentiment-int8.onnx' to '/root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317.incomplete'
Request 28f68df9-bd85-4d60-859d-3c22e34cc3c0: GET https://cdn-lfs-us-1.hf.co/repos/b0/83/b083c11095994a6abc68b9126cec9b7709f2b3e4903521a234bb36c8a2ab2132/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317?response-content-disposition=inline%3B+filename*%3DU

sentiment-int8.onnx:   0%|          | 0.00/40.6M [00:00<?, ?B/s]

Download complete. Moving file to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317
INFO:huggingface_hub.file_download:Download complete. Moving file to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317
Creating pointer from ../../blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317 to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/snapshots/20b14b3e87afcebd3416e974539e3933ce758688/sentiment-int8.onnx
DEBUG:huggingface_hub.file_download:Creating pointer from ../../blobs/05c4d8eff73e78584b4a615fc4902af8d7272f79499fc72138c35081610b2317 to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/snapshots/20b14b3e87afcebd3416e974539e3933ce758688/sentiment-int8.onnx
Request a0904b5e-1c02-41b1-be7c-9bfed3cbec35: HEAD https://huggingface.co/Frenz/modelsent_test/resolve/main/tokenizer_sentiment.pkl (auth

tokenizer_sentiment.pkl:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

Download complete. Moving file to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/833a6a070977fb1a8de3c9e0ab566188f95f8f28acb504cbc78c45e83584a573
INFO:huggingface_hub.file_download:Download complete. Moving file to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/blobs/833a6a070977fb1a8de3c9e0ab566188f95f8f28acb504cbc78c45e83584a573
Creating pointer from ../../blobs/833a6a070977fb1a8de3c9e0ab566188f95f8f28acb504cbc78c45e83584a573 to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/snapshots/20b14b3e87afcebd3416e974539e3933ce758688/tokenizer_sentiment.pkl
DEBUG:huggingface_hub.file_download:Creating pointer from ../../blobs/833a6a070977fb1a8de3c9e0ab566188f95f8f28acb504cbc78c45e83584a573 to /root/.cache/huggingface/hub/models--Frenz--modelsent_test/snapshots/20b14b3e87afcebd3416e974539e3933ce758688/tokenizer_sentiment.pkl


In [None]:
import onnxruntime
import joblib
import numpy as np

tokenizer = joblib.load(tokenizer_path) # load tokenizer
onnx_model_path = model_path           # load model quantized int8
ort_session = onnxruntime.InferenceSession(onnx_model_path)

In [None]:
# import numpy as np
# def softmax(x):
#     exp_x = np.exp(x - np.max(x))
#     return exp_x / exp_x.sum(axis=-1, keepdims=True)

# def analyze_sentimentinference(row, ort_session, tokenizer):
#     inputs = tokenizer(row['text'], return_tensors="pt")
#     # Prepare input data for ONNX model
#     input_data = {
#                 "input_ids": inputs["input_ids"].numpy(),
#                 "attention_mask": inputs["attention_mask"].numpy(),
#                 "token_type_ids": inputs["token_type_ids"].numpy(),
#                 }
#     output = ort_session.run(None, input_data)
#     # Convert logits to probabilities using softmax
#     logits = np.array(output[0])
#     probabilities = softmax(logits)
#     row['sentiment'] = np.argmax(probabilities).item()
#     row['prob'] = np.max(probabilities).item()
#     return row

In [None]:
def analyze_sentimentinference(text, ort_session, tokenizer):
    inputs = tokenizer(text, return_tensors="pt")
    ort_inputs = {k: v.cpu().numpy() for k, v in inputs.items()}
    ort_outs = ort_session.run(None, ort_inputs)
    probabilities = np.exp(ort_outs[0][0]) / np.exp(ort_outs[0][0]).sum(-1, keepdims=True)
    sentiment = "Positive" if probabilities[1] > probabilities[0] else "Negative"

    return sentiment, probabilities[1], probabilities[0]

In [None]:
import pandas as pd
test = pd.DataFrame({'text': ['i love this product',
                              'i hate this product',
                              "i love this product, it's awesome!!"]}
                    )

# Apply sentiment analysis to each row in the DataFrame
test[['sentiment','prob Pos','prob Neg']] = test['text'].apply(lambda row: pd.Series(analyze_sentimentinference(row, ort_session,tokenizer))).round(2)
test


Unnamed: 0,text,sentiment,prob Pos,prob Neg
0,i love this product,Positive,1.0,0.0
1,i hate this product,Negative,0.01,0.99
2,"i love this product, it's awesome!!",Positive,1.0,0.0
