In [1]:
print("hey")

hey


In [2]:
import torch
torch.cuda.is_available()

True

In [1]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# Define the model path and load the model
model_path = "./model"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=2)

# Set the model to evaluation mode
model.eval()

# Define the dummy input for the model
dummy_input = tokenizer("This is a dummy input", return_tensors="pt")

# Define the input and output names for the ONNX model
input_names = ["input_ids", "attention_mask"]
output_names = ["output"]

# Export the model with opset version 14
torch.onnx.export(
    model, 
    (dummy_input["input_ids"], dummy_input["attention_mask"]), 
    "model.onnx", 
    input_names=input_names, 
    output_names=output_names, 
    opset_version=14, 
    dynamic_axes={
        "input_ids": {0: "batch_size", 1: "sequence_length"},
        "attention_mask": {0: "batch_size", 1: "sequence_length"},
        "output": {0: "batch_size"}
    }
)

print("Model has been successfully converted to ONNX format.")


  from .autonotebook import tqdm as notebook_tqdm


Model has been successfully converted to ONNX format.


In [12]:
from fastapi import FastAPI
from pydantic import BaseModel
import onnxruntime as ort
import numpy as np
from transformers import BertTokenizer

# Initialize the FastAPI app
app = FastAPI()

# Define the model path
model_path = "./model.onnx"
tokenizer_path = "./model"

# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained(tokenizer_path)

# Load the ONNX model
ort_session = ort.InferenceSession(model_path)

# Define the input schema
class TextInput(BaseModel):
    text: str

def preprocess(text: str):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="np", padding=True, truncation=True)
    return inputs["input_ids"], inputs["attention_mask"]

def predict_onnx(text: str):
    # Preprocess the input text
    input_ids, attention_mask = preprocess(text)
    
    # Run the ONNX model
    ort_inputs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask
    }
    ort_outs = ort_session.run(None, ort_inputs)
    
    # Get the output
    return ort_outs[0]

# Define the predict endpoint
@app.post("/predict")
def predict(input: TextInput):
    # Perform the prediction
    logits = predict_onnx(input.text)
    prediction = np.argmax(logits, axis=1).item()
    return {"prediction": prediction}


In [14]:
import onnx
from onnxruntime.transformers import optimizer

# Load the ONNX model
onnx_model_path = "model.onnx"
onnx_model = onnx.load(onnx_model_path)

# Optimize the ONNX model
optimized_model = optimizer.optimize_model(onnx_model, model_type='bert')

# Save the optimized model
optimized_model_path = "model2.onnx"
optimized_model.save_model_to_file(optimized_model_path)

print("Model has been further optimized and saved.")


Model has been further optimized and saved.


In [16]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Define the path to your local model directory
model_dir = "model/"

# Load the tokenizer and model from the local directory
tokenizer = BertTokenizer.from_pretrained(model_dir)
model = BertForSequenceClassification.from_pretrained(model_dir)

# Test the model
text = "This is a great movie!"
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
predicted_class_id = torch.argmax(logits).item()

print(f"Predicted class: {predicted_class_id}")


Predicted class: 1


In [2]:
#code to convert to ONNX models from pytorch 
import torch
from transformers import BertForSequenceClassification, BertTokenizer

model_path = "./model"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=2)

# Dummy input for tracing the model
dummy_input = tokenizer("This is a dummy input", return_tensors="pt")

# Convert the model to ONNX
torch.onnx.export(model, 
                  (dummy_input['input_ids'], dummy_input['attention_mask']), 
                  "model.onnx",
                  input_names=['input_ids', 'attention_mask'], 
                  output_names=['output'],
                  dynamic_axes={'input_ids': {0: 'batch_size'}, 'attention_mask': {0: 'batch_size'}, 'output': {0: 'batch_size'}})


In [None]:
#backup onnx code 
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import BertForSequenceClassification, BertTokenizer, TextClassificationPipeline, BertTokenizerFast
import onnxruntime as ort
import numpy as np


app = FastAPI()

# Load the model and tokenizer

local_model_path = "./model"
tokenizer = BertTokenizerFast.from_pretrained(local_model_path)
onnx_model_path = "model.onnx"
ort_session = ort.InferenceSession(onnx_model_path)

#for pytorch
# tokenizer = BertTokenizerFast.from_pretrained(model_path)
# model = BertForSequenceClassification.from_pretrained(model_path, num_labels=2)
# pipeline = TextClassificationPipeline(model=model, tokenizer=tokenizer)

class TextInput(BaseModel):
    text: str

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

#for running with pytorch
# @app.post("/classify/")
# def classify_text(input: TextInput):
#     result = pipeline(input.text)
#     return {"label": result[0]['label'], "score": result[0]['score']}


#for onnx
@app.post("/classify/")
def classify_text(input: TextInput):
    inputs = tokenizer(input.text, return_tensors="pt")
    ort_inputs = {k: to_numpy(v) for k, v in inputs.items()}
    ort_outputs = ort_session.run(None, ort_inputs)
    output = ort_outputs[0]
    label = np.argmax(output, axis=1)[0]
    score = np.max(output, axis=1)[0]
    return {"label": int(label), "score": float(score)}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
