In [22]:
%%writefile ./predictor/index_to_name.json

{
    "0": "NEGATIVE",
    "1": "NEUTRAL",
    "2": "POSITIVE"
}


Overwriting ./predictor/index_to_name.json


In [23]:
%%writefile predictor/custom_handler.py
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

import os
import json
import logging
import torch
from transformers import BertTokenizer
from ts.torch_handler.base_handler import BaseHandler
from bert_model import initialize_bert_model

logger = logging.getLogger(__name__)

class TransformersClassifierHandler(BaseHandler):
    """
    A custom handler for PyTorch Serve to handle BERT-based models
    with additional layers and `.pth`-saved state_dict.
    """
    def __init__(self):
        super(TransformersClassifierHandler, self).__init__()
        self.initialized = False

    def initialize(self, ctx):
        """
        Initialize the model, tokenizer, and label mapping for inference.
        """
        self.manifest = ctx.manifest

        properties = ctx.system_properties
        model_dir = properties.get("model_dir")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Load the model architecture
        num_labels = 3  # Adjust based on your dataset (e.g., Negative, Neutral, Positive)
        self.model = initialize_bert_model(num_labels=num_labels).to(self.device)

        # Load the model state_dict
        serialized_file = self.manifest["model"]["serializedFile"]
        model_path = os.path.join(model_dir, serialized_file)
        if not os.path.isfile(model_path):
            raise RuntimeError(f"Missing the model file: {model_path}")
        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
        self.model.eval()
        logger.info(f"Model loaded from {model_path}")

        # Load the tokenizer
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

        # Load the label mapping
        mapping_file_path = os.path.join(model_dir, "index_to_name.json")
        if os.path.isfile(mapping_file_path):
            with open(mapping_file_path, "r") as f:
                self.mapping = json.load(f)
        else:
            logger.warning("index_to_name.json not found. Using default label mapping.")
            self.mapping = {"0": "Negative", "1": "Neutral", "2": "Positive"}

        self.initialized = True

        

    # def preprocess(self, data):
    #     """
    #     Preprocess input data by tokenizing the text and creating input tensors.
    #     """
    #     input_text = data[0].get("data") or data[0].get("body")
    #     if isinstance(input_text, bytes):
    #         input_text = input_text.decode("utf-8")
    #     logger.info(f"Received input: {input_text}")

    #     # Tokenize the input text
    #     inputs = self.tokenizer(
    #         input_text,
    #         padding="max_length",
    #         max_length=128,
    #         truncation=True,
    #         return_tensors="pt"
    #     )
    #     return {
    #         "input_ids": inputs["input_ids"].to(self.device),
    #         "attention_mask": inputs["attention_mask"].to(self.device)
    #     }

    def preprocess(self, data):
        # Ensure 'instances' key exists
        if "instances" not in data:
            raise ValueError("Invalid input format. Expected a JSON object with 'instances' key.")
        
        # Extract instances
        instances = data["instances"]

        # Process each instance
        texts = [instance["text"] for instance in instances]
        additional_features = [
            [instance["price"], 
            float(instance["price_missing"]), 
            instance["helpful_vote"], 
            float(instance["verified_purchase"])]
            for instance in instances
        ]

        # Tokenize text data
        tokenized = self.tokenizer(
            texts, 
            padding="max_length", 
            truncation=True, 
            max_length=128, 
            return_tensors="pt"
        )

        return tokenized, torch.tensor(additional_features, dtype=torch.float32)

    # def inference(self, inputs):
    #     """
    #     Perform inference using the loaded model and processed inputs.
    #     """
    #     with torch.no_grad():
    #         outputs = self.model(
    #             input_ids=inputs["input_ids"],
    #             attention_mask=inputs["attention_mask"]
    #         )
    #         logits = outputs["logits"]
    #         prediction = torch.argmax(logits, dim=1).item()

    #     # Map prediction to label
    #     predicted_label = self.mapping.get(str(prediction), "Unknown")
    #     logger.info(f"Prediction: {predicted_label}")
    #     return [predicted_label]

    def inference(self, inputs):
        """
        Perform inference using the loaded model and processed inputs.
        """
        with torch.no_grad():
            outputs = self.model(
                input_ids=inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                additional_features=inputs["additional_features"]
            )
            logits = outputs["logits"]
            prediction = torch.argmax(logits, dim=1).item()

        # Map prediction to label
        predicted_label = self.mapping.get(str(prediction), "Unknown")
        return [predicted_label]


    def postprocess(self, inference_output):
        """
        Post-process the output to be returned as a JSON response.
        """
        return inference_output


Overwriting predictor/custom_handler.py


In [24]:
%%writefile predictor/custom_handler.py

import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

import json
import logging
import torch
from transformers import BertTokenizer
from ts.torch_handler.base_handler import BaseHandler
from bert_model import initialize_bert_model

logger = logging.getLogger(__name__)

class TransformersClassifierHandler(BaseHandler):
    """
    A custom handler for PyTorch Serve to handle BERT-based models
    with additional layers and `.pth`-saved state_dict.
    """
    def __init__(self):
        super(TransformersClassifierHandler, self).__init__()
        self.initialized = False

    def initialize(self, ctx):
        """
        Initialize the model, tokenizer, and label mapping for inference.
        """
        self.manifest = ctx.manifest
        properties = ctx.system_properties
        model_dir = properties.get("model_dir")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Load the model architecture
        num_labels = 3  # Adjust based on your dataset
        self.model = initialize_bert_model(num_labels=num_labels).to(self.device)

        # Load the model state_dict
        serialized_file = self.manifest["model"]["serializedFile"]
        model_path = os.path.join(model_dir, serialized_file)
        if not os.path.isfile(model_path):
            raise RuntimeError(f"Missing the model file: {model_path}")
        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
        self.model.eval()
        logger.info(f"Model loaded from {model_path}")

        # Load the tokenizer
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

        # Load the label mapping
        mapping_file_path = os.path.join(model_dir, "index_to_name.json")
        if os.path.isfile(mapping_file_path):
            with open(mapping_file_path, "r") as f:
                self.mapping = json.load(f)
        else:
            logger.warning("index_to_name.json not found. Using default label mapping.")
            self.mapping = {"0": "Negative", "1": "Neutral", "2": "Positive"}

        self.initialized = True

    def preprocess(self, data):
        """
        Preprocessing input request by tokenizing and extracting additional features.
        Extend with your own preprocessing steps as needed.
        """
        try:
            # Log the received data
            logger.info(f"Received input data: {data}")
            logger.info(f"Data type: {type(data)}")

            # # Handle byte inputs or TorchServe-wrapped inputs
            # if isinstance(data, (bytes, bytearray)):
            #     data = json.loads(data.decode("utf-8"))
            # elif isinstance(data, list):
            #     if isinstance(data[0], (bytes, bytearray)):
            #         data = json.loads(data[0].decode("utf-8"))
            #     else:
            # data = data[0]

            # # Ensure data contains "instances" key
            # if not isinstance(data, dict):
            #     raise ValueError("Invalid input format. Expected a JSON object.")
            # if "instances" not in data:
            #     raise ValueError("Invalid input format. Expected a JSON object with 'instances' key.")

            # Extract instances
            instances = data#.keys()
            logger.info(f"Parsed instances: {instances}")
            logger.info(f"Parsed instances: {instances}")

            texts = []
            additional_features = []
            
            # Extract and validate data from each instance
            for instance in instances:
                # if not all(key in instance for key in ["text", "price", "price_missing", "helpful_vote", "verified_purchase"]):
                #     raise ValueError(f"Invalid instance format: {instance}")
                
                texts.append(instance["text"])
                additional_features.append([
                    float(instance["price"]),
                    float(instance["price_missing"]),
                    float(instance["helpful_vote"]),
                    float(instance["verified_purchase"])
                ])

            # Log extracted inputs
            logger.info(f"Extracted texts: {texts}")
            logger.info(f"Extracted additional features: {additional_features}")

            # Tokenize text inputs
            tokenized = self.tokenizer(
                texts,
                padding="max_length",
                truncation=True,
                max_length=128,
                return_tensors="pt"
            )
            logger.info(f"Tokenized input_ids: {tokenized['input_ids']}")
            logger.info(f"Tokenized attention_mask: {tokenized['attention_mask']}")

            # Return the processed inputs
            return {
                "input_ids": tokenized["input_ids"].to(self.device),
                "attention_mask": tokenized["attention_mask"].to(self.device),
                "additional_features": torch.tensor(additional_features, dtype=torch.float32).to(self.device)
            }
        except Exception as e:
            logger.error(f"Preprocessing failed: {e}")
            raise ValueError(f"Error during preprocessing: {e}")

    def inference(self, inputs):
        """
        Perform inference using the loaded model and processed inputs.
        """
        try:
            logger.info(f"Inference inputs: {inputs}")
            with torch.no_grad():
                outputs = self.model(
                    input_ids=inputs["input_ids"],
                    attention_mask=inputs["attention_mask"],
                    additional_features=inputs["additional_features"]
                )
                logits = outputs["logits"]
                predictions = torch.argmax(logits, dim=1).tolist()

            logger.info(f"Inference outputs (logits): {logits}")
            logger.info(f"Predicted labels: {predictions}")
            logger.info(self.mapping,[(self.mapping.get(str(pred), "Unknown"),pred) for pred in predictions])
            return [self.mapping.get(str(pred), "Unknown") for pred in predictions]
        except Exception as e:
            logger.error(f"Inference failed: {e}")
            raise ValueError(f"Error during inference: {e}")

    def postprocess(self, inference_output):
        """
        Post-processes the model output for returning to the client.
        """
        logger.info(f"Postprocessing output: {inference_output}")
        # return {"predictions": inference_output}
        return inference_output



Overwriting predictor/custom_handler.py


In [25]:
# %%bash -s $APP_NAME

# APP_NAME=$1

# cat << EOF > ./predictor/Dockerfile

# FROM pytorch/torchserve:latest-cpu

# # Install dependencies
# RUN python3 -m pip install --upgrade pip
# RUN pip3 install transformers

# # Switch to the model-server user
# USER model-server

# # Set the model directory path
# WORKDIR /home/model-server/

# # Copy model artifacts, custom handler, and utilities
# COPY ./bert-sent-model /home/model-server/bert-sent-model/
# COPY ./custom_handler.py /home/model-server/custom_handler.py
# COPY ./data_loader.py /home/model-server/data_loader.py
# COPY ./bert_model.py /home/model-server/bert_model.py

# COPY ./utils /home/model-server/utils/
# COPY ./index_to_name.json /home/model-server/

# # Verify the contents of the server directory
# RUN ls -l /home/model-server

# # Switch to root user for configuration
# USER root

# # Create TorchServe configuration file
# RUN printf "\nservice_envelope=json" >> /home/model-server/config.properties
# RUN printf "\ninference_address=http://0.0.0.0:7080" >> /home/model-server/config.properties
# RUN printf "\nmanagement_address=http://0.0.0.0:7081" >> /home/model-server/config.properties
# RUN printf "\ndisable_token_authorization=true" >> /home/model-server/config.properties

# # Change permissions for the model-server user
# RUN chown -R model-server:model-server /home/model-server

# # Switch back to the model-server user
# USER model-server

# # Expose health and prediction listener ports from the image
# EXPOSE 7080
# EXPOSE 7081

# # Create the TorchServe model archive
# RUN torch-model-archiver -f \
#     --model-name bert_classifier \
#     --version 1.0 \
#     --serialized-file /home/model-server/bert-sent-model/final_model.pth \
#     --handler /home/model-server/custom_handler.py \
#     --extra-files "/home/model-server/bert_model.py,/home/model-server/data_loader.py,/home/model-server/utils/roberta_model.py,/home/model-server/index_to_name.json" \
#     --export-path /home/model-server/model-store

# # Start TorchServe
# CMD ["torchserve", \
#      "--start", \
#      "--ts-config=/home/model-server/config.properties", \
#      "--models", \
#      "bert_classifier=bert_classifier.mar", \
#      "--model-store", \
#      "/home/model-server/model-store"]
# EOF

# echo "Writing ./predictor/Dockerfile"

In [26]:
PROJECT_ID = "amazonreviewssentimentanalysis"  # <---CHANGE THIS TO YOUR PROJECT
APP_NAME = "review_sentiment_bert_model"
BUCKET_NAME = "gs://arsa_model_deployment_uscentral"
REGION = 'us-central1'

In [27]:
%%bash -s $APP_NAME

APP_NAME=$1

cat << EOF > ./predictor/Dockerfile

FROM pytorch/torchserve:latest-cpu

# Install dependencies
RUN python3 -m pip install --upgrade pip
RUN pip3 install transformers

# Switch to the model-server user
USER model-server

# Set the model directory path
WORKDIR /home/model-server/

# Copy model artifacts, custom handler, and utilities
COPY ./bert-sent-model /home/model-server/bert-sent-model/
COPY ./custom_handler.py /home/model-server/custom_handler.py
COPY ./data_loader.py /home/model-server/data_loader.py
COPY ./bert_model.py /home/model-server/bert_model.py
COPY ./utils /home/model-server/utils/
COPY ./index_to_name.json /home/model-server/

# Verify the contents of the server directory
RUN ls -l /home/model-server

# Switch to root user for configuration
USER root

# Create TorchServe configuration file
RUN printf "\nservice_envelope=json" >> /home/model-server/config.properties
RUN printf "\ninference_address=http://0.0.0.0:7080" >> /home/model-server/config.properties
RUN printf "\nmanagement_address=http://0.0.0.0:7081" >> /home/model-server/config.properties
RUN printf "\ndisable_token_authorization=true" >> /home/model-server/config.properties

# Change permissions for the model-server user
RUN chown -R model-server:model-server /home/model-server

# Switch back to the model-server user
USER model-server

# Expose health and prediction listener ports from the image
EXPOSE 7080
EXPOSE 7081

# Create the TorchServe model archive
RUN torch-model-archiver -f \
    --model-name=$APP_NAME \
    --version 1.0 \
    --serialized-file /home/model-server/bert-sent-model/final_model.pth \
    --handler /home/model-server/custom_handler.py \
    --extra-files "/home/model-server/bert_model.py,/home/model-server/data_loader.py,/home/model-server/utils/roberta_model.py,/home/model-server/index_to_name.json" \
    --export-path /home/model-server/model-store

# Start TorchServe
CMD ["torchserve", \
     "--start", \
     "--ts-config=/home/model-server/config.properties", \
     "--models", \
     "$APP_NAME=$APP_NAME.mar", \
     "--model-store", \
     "/home/model-server/model-store"]
EOF

echo "Writing ./predictor/Dockerfile"

Writing ./predictor/Dockerfile


In [28]:


CUSTOM_PREDICTOR_IMAGE_URI = f"gcr.io/{PROJECT_ID}/pytorch_predict_{APP_NAME}"
print(f"CUSTOM_PREDICTOR_IMAGE_URI = {CUSTOM_PREDICTOR_IMAGE_URI}")

CUSTOM_PREDICTOR_IMAGE_URI = gcr.io/amazonreviewssentimentanalysis/pytorch_predict_review_sentiment_bert_model


In [29]:
!docker build \
  --tag=$CUSTOM_PREDICTOR_IMAGE_URI \
  ./predictor

[1A[1B[0G[?25l[+] Building 0.0s (0/1)                                    docker:desktop-linux
[?25h[1A[0G[?25l[+] Building 0.2s (1/2)                                    docker:desktop-linux
[34m => [internal] load build definition from Dockerfile                       0.0s
[0m[34m => => transferring dockerfile: 2.20kB                                     0.0s
[0m => [internal] load metadata for docker.io/pytorch/torchserve:latest-cpu   0.2s
[?25h[1A[1A[1A[1A[0G[?25l[+] Building 0.3s (1/2)                                    docker:desktop-linux
[34m => [internal] load build definition from Dockerfile                       0.0s
[0m[34m => => transferring dockerfile: 2.20kB                                     0.0s
[0m => [internal] load metadata for docker.io/pytorch/torchserve:latest-cpu   0.3s
[?25h[1A[1A[1A[1A[0G[?25l[+] Building 0.5s (1/2)                                    docker:desktop-linux
[34m => [internal] load build definition from Dockerfile     

In [30]:
!docker stop local_bert_classifier
!docker run -t -d --rm -p 7080:7080 --name=local_bert_classifier $CUSTOM_PREDICTOR_IMAGE_URI
!sleep 20

local_bert_classifier
887b83bfcb8000ae61bf0ebb618928b513524ec51051044242b4de8925008e18


In [31]:
!curl http://localhost:7080/ping


{
  "status": "Healthy"
}


In [32]:
# !curl -X POST \
#      -H "Content-Type: application/json" \
#      -d '{
#          "instances": [
#              {
#                  "text": "The product was excellent and exceeded my expectations.",
#                  "title": "Excellent Quality",
#                  "price": 49.99,
#                  "price_missing": false,
#                  "helpful_vote": 15,
#                  "verified_purchase": true
#              }
#          ]
#         }' \
#      http://localhost:7080/predictions/bert_classifier


In [33]:
%%writefile ./predictor/instances.json

{
    "instances": [
        {
            "text": "The product was excellent and exceeded my expectations.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
            "verified_purchase": true
        },
        {
            "text": "The product was excellent and exceeded my expectations.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
            "verified_purchase": true
        }

    ]
}


Overwriting ./predictor/instances.json


In [34]:
instances = [{'text': 'The product was XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXmy expectations.', 'title': 'Excellent Quality', 'price': 49.99, 'price_missing': False, 'helpful_vote': 15, 'verified_purchase': True}]

In [35]:
texts = []
additional_features = []

# Extract and validate data from each instance
for instance in instances:
    # if not all(key in instance for key in ["text", "price", "price_missing", "helpful_vote", "verified_purchase"]):
    #     raise ValueError(f"Invalid instance format: {instance}")
    print(instance["text"])
    texts.append(instance["text"])
    additional_features.append([
        float(instance["price"]),
        float(instance["price_missing"]),
        float(instance["helpful_vote"]),
        float(instance["verified_purchase"])
    ])


The product was XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXmy expectations.


In [36]:
additional_features, texts

([[49.99, 0.0, 15.0, 1.0]],
 ['The product was XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXmy expectations.'])

### Local Test with docker service running:

saves a instances.json

In [None]:
%%writefile ./predictor/instances.json

{
    "instances": [
        {
            "text": "The product was excellent and exceeded my expectations.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
            "verified_purchase": true
        },
        {
            "text": "The product was excellent and exceeded my expectations.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
            "verified_purchase": true
        }

    ]
}


In [None]:
# docker pull that image
# docker build it with a tag once service up: test health

In [None]:
#helth check
!curl http://localhost:7080/ping


In [None]:
#submit 2 instances
!curl -X POST \
    -H "Content-Type: application/json" \
    -d @./predictor/instances.json \
    http://localhost:7080/predictions/{MODEL_NAME}


{"predictions": ["POSITIVE", "POSITIVE"]}

In [42]:
import json

data = '''{
    "instances": [
        {
            "text": "The product was excellent and exceeded my expectations.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
            "verified_purchase": true
        }
    ]
}'''

try:
    json.loads(data)
    print("JSON is valid")
except json.JSONDecodeError as e:
    print(f"JSON is invalid: {e}")


JSON is valid


### Deploy

In [43]:
CUSTOM_PREDICTOR_IMAGE_URI

'gcr.io/amazonreviewssentimentanalysis/pytorch_predict_review_sentiment_bert_model'

In [44]:
!docker push $CUSTOM_PREDICTOR_IMAGE_URI

Using default tag: latest
The push refers to repository [gcr.io/amazonreviewssentimentanalysis/pytorch_predict_review_sentiment_bert_model]

[1B1f14237c: Preparing 
[1B53a2e40c: Preparing 
[1B5ebbaa33: Preparing 
[1B7fd5917d: Preparing 
[1B5889c774: Preparing 
[1Bb34d3786: Preparing 
[1Bbf18a086: Preparing 
[1B2781bbe2: Preparing 
[1B72a20011: Preparing 
[1Be36f0de6: Preparing 
[1Bce061fd9: Preparing 
[1Bbf25fa3f: Preparing 
[7Bbf18a086: Preparing 
[1Bc73cb705: Preparing 
[9Bbf18a086: Preparing 
[1B1bba1191: Preparing 
[1Bad708752: Preparing 
[1B8883823f: Preparing 
[1Bb40df9dc: Preparing 
[1Ba2533c0c: Preparing 
[1B69d81cd9: Preparing 
[1Ba9d1cad1: Preparing 
[23Bf14237c: Pushed   405.6MB/405.6MB1A[2K[23A[2K[18A[2K[15A[2K[17A[2K[16A[2K[23A[2K[23A[2K[11A[2K[12A[2K[13A[2K[23A[2K[23A[2K[10A[2K[9A[2K[7A[2K[23A[2K[6A[2K[3A[2K[2A[2K[1A[2K[23A[2K[23A[2K[23A[2K[23A[2K[23A[2K[23A[2K[23A[2K[23A[2K[23A[2K[23A[2K

In [45]:
APP_NAME = "review_sentiment_bert_model"
BUCKET_NAME = "gs://arsa_model_deployment_uscentral"
REGION = 'us-central1'


In [53]:
predict_custom_trained_model_sample(
    project="661148801406",
    endpoint_id="5854830148674650112",
    location="us-central1",
    instances={ "instance_key_1": "value",}
)

NameError: name 'predict_custom_trained_model_sample' is not defined

In [46]:
import base64
import json
import os
import random
import sys

import google.auth
from google.cloud import aiplatform
from google.cloud.aiplatform import gapic as aip
from google.cloud.aiplatform import hyperparameter_tuning as hpt
from google.protobuf.json_format import MessageToDict

In [47]:
aiplatform.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)#, credentials="/home/hrs/Documents/GCP_keys/key.json")

In [48]:
VERSION = 1
model_display_name = f"{APP_NAME}-v{VERSION}"
model_description = "PyTorch serve deploymend model for amazon reviews classification"

MODEL_NAME = APP_NAME
health_route = "/ping"
predict_route = f"/predictions/{MODEL_NAME}"
serving_container_ports = [7080]

In [49]:
model = aiplatform.Model.upload(
    display_name=model_display_name,
    description=model_description,
    serving_container_image_uri=CUSTOM_PREDICTOR_IMAGE_URI,
    serving_container_predict_route=predict_route,
    serving_container_health_route=health_route,
    serving_container_ports=serving_container_ports,
)

model.wait()

print(model.display_name)
print(model.resource_name)

Creating Model
Create Model backing LRO: projects/661148801406/locations/us-central1/models/67858559131451392/operations/7923936656476864512
Model created. Resource name: projects/661148801406/locations/us-central1/models/67858559131451392@1
To use this Model in another session:
model = aiplatform.Model('projects/661148801406/locations/us-central1/models/67858559131451392@1')
review_sentiment_bert_model-v1
projects/661148801406/locations/us-central1/models/67858559131451392


##### **Create an Endpoint for Model with Custom Container**

In [50]:
endpoint_display_name = f"{APP_NAME}-endpoint"
endpoint = aiplatform.Endpoint.create(display_name=endpoint_display_name)

Creating Endpoint
Create Endpoint backing LRO: projects/661148801406/locations/us-central1/endpoints/3348154743577903104/operations/3906725788862382080
Endpoint created. Resource name: projects/661148801406/locations/us-central1/endpoints/3348154743577903104
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/661148801406/locations/us-central1/endpoints/3348154743577903104')


##### **Deploy the Model to Endpoint**

Deploying a model associates physical resources with the model so it can serve online predictions with low latency. 

**NOTE:** This step takes few minutes to deploy the resources.

In [51]:
traffic_percentage = 100
machine_type = "n1-standard-4"
deployed_model_display_name = model_display_name
min_replica_count = 1
max_replica_count = 3
sync = True

model.deploy(
    endpoint=endpoint,
    deployed_model_display_name=deployed_model_display_name,
    machine_type=machine_type,
    traffic_percentage=traffic_percentage,
    sync=sync,  
)

Deploying model to Endpoint : projects/661148801406/locations/us-central1/endpoints/3348154743577903104
Deploy Endpoint model backing LRO: projects/661148801406/locations/us-central1/endpoints/3348154743577903104/operations/6087593908416544768
Endpoint model deployed. Resource name: projects/661148801406/locations/us-central1/endpoints/3348154743577903104


<google.cloud.aiplatform.models.Endpoint object at 0x72250de0f5b0> 
resource name: projects/661148801406/locations/us-central1/endpoints/3348154743577903104

In [55]:
endpoint = aiplatform.Endpoint(
    endpoint_name="projects/661148801406/locations/us-central1/endpoints/5854830148674650112"
)
print(endpoint.gca_resource)
model = aiplatform.Model(
    model_name="projects/661148801406/locations/us-central1/models/9036777217039794176"
)
print(model.gca_resource)


name: "projects/661148801406/locations/us-central1/endpoints/5854830148674650112"
display_name: "review_sentiment_bert_model-endpoint"
deployed_models {
  id: "8619930368717357056"
  model: "projects/661148801406/locations/us-central1/models/9036777217039794176"
  display_name: "review_sentiment_bert_model-v2"
  create_time {
    seconds: 1732728972
    nanos: 625510000
  }
  dedicated_resources {
    machine_spec {
      machine_type: "n1-standard-4"
    }
    min_replica_count: 1
    max_replica_count: 1
  }
  model_version_id: "1"
}
traffic_split {
  key: "8619930368717357056"
  value: 100
}
etag: "AMEw9yPi2QVzKqcViZcjjjWWoUIddHHjXR_x8rugG9ZHG86HEjdZ-TB5uK79sG9jJh7c"
create_time {
  seconds: 1732728966
  nanos: 559808000
}
update_time {
  seconds: 1732729943
  nanos: 579557000
}

name: "projects/661148801406/locations/us-central1/models/9036777217039794176"
display_name: "review_sentiment_bert_model-v2"
description: "PyTorch serve deploymend model for amazon reviews classification"


#### **Invoking the Endpoint with deployed Model using Vertex AI SDK to make predictions**
##### **Get the Endpoint id**

In [56]:
endpoint_display_name = f"{APP_NAME}-endpoint"
filter = f'display_name="{endpoint_display_name}"'

for endpoint_info in aiplatform.Endpoint.list(filter=filter):
    print(
        f"Endpoint display name = {endpoint_info.display_name} resource id ={endpoint_info.resource_name} "
    )

endpoint = aiplatform.Endpoint(endpoint_info.resource_name)

Endpoint display name = review_sentiment_bert_model-endpoint resource id =projects/661148801406/locations/us-central1/endpoints/3348154743577903104 


In [57]:
endpoint_info.resource_name

'projects/661148801406/locations/us-central1/endpoints/3348154743577903104'

In [58]:
endpoint.list_models()

[id: "2997186233945292800"
 model: "projects/661148801406/locations/us-central1/models/67858559131451392"
 display_name: "review_sentiment_bert_model-v1"
 create_time {
   seconds: 1732735180
   nanos: 325253000
 }
 dedicated_resources {
   machine_spec {
     machine_type: "n1-standard-4"
   }
   min_replica_count: 1
   max_replica_count: 1
 }
 model_version_id: "1"]

In [44]:
# Check endpoints for this model
model_id = '6757955805590323200'
endpoints = aiplatform.Endpoint.list()
for endpoint in endpoints:
    deployed_models = endpoint.gca_resource.deployed_models
    for deployed_model in deployed_models:
        if model_id in deployed_model.model:
            print(f"Model {model_id} is deployed to endpoint {endpoint.display_name} with ID {endpoint.resource_name}.")


In [46]:
# Define the endpoint resource name
endpoint_name = "projects/661148801406/locations/us-central1/endpoints/4555119443713196032"

# Create the endpoint object
endpoint = aiplatform.Endpoint(endpoint_name=endpoint_name)

# Prepare the input data
instances = [
    {
        "text": "The product was excellent and exceeded my expectations.",
        "title": "Excellent Quality",
        "price": 49.99,
        "price_missing": False,
        "helpful_vote": 15,
        "verified_purchase": True,
    },
    {
        "text": "The product was bad and sucked to live my expectations.",
        "title": "Bad Quality",
        "price": 19.99,
        "price_missing": False,
        "helpful_vote": 5,
        "verified_purchase": False,
    },
]

formatted_input = {"instances": instances}

# Perform prediction
response = endpoint.predict(instances=formatted_input["instances"])

# Print the prediction results
print("Prediction Response:")
print(json.dumps(response.predictions, indent=4))


NotFound: 404 Endpoint `projects/661148801406/locations/us-central1/endpoints/4555119443713196032` not found.

In [61]:
import json
import base64

test_instances = [
    {
        "text": "The product was excellent and exceeded my expectations.",
        "title": "Excellent Quality",
        "price": 49.99,
        "price_missing": False,
        "helpful_vote": 15,
        "verified_purchase": True,
    },
    {
        "text": "The product was bad and sucked to live my expectations.",
        "title": "Excellent Quality",
        "price": 49.99,
        "price_missing": False,
        "helpful_vote": 15,
        "verified_purchase": True,
    },
{
            "text": "The product was very good.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": False,
            "helpful_vote": 15,
            "verified_purchase": True
        }

]

print("=" * 100)
for instance in test_instances:
    print(f"Input instance: \n\t{instance}\n")
    
    formatted_input = {"instances": test_instances}
    print(f"Formatted input JSON: \n{json.dumps(formatted_input, indent=4)}\n")

    # Assuming `endpoint.predict()` is the method for inference
    prediction = endpoint.predict(instances=test_instances)
    print(f"Prediction response: \n\t{prediction}")
    print("=" * 100)


Input instance: 
	{'text': 'The product was excellent and exceeded my expectations.', 'title': 'Excellent Quality', 'price': 49.99, 'price_missing': False, 'helpful_vote': 15, 'verified_purchase': True}

Formatted input JSON: 
{
    "instances": [
        {
            "text": "The product was excellent and exceeded my expectations.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
            "verified_purchase": true
        },
        {
            "text": "The product was bad and sucked to live my expectations.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
            "verified_purchase": true
        },
        {
            "text": "The product was very good.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
  

In [16]:
%%bash -s $REGION $endpoint_display_name

REGION=$1
endpoint_display_name=$2

# get endpoint id

echo "REGION = ${REGION}"
echo "ENDPOINT DISPLAY NAME = ${endpoint_display_name}"
endpoint_id=$(gcloud beta ai endpoints list --region ${REGION} --filter "display_name=${endpoint_display_name}" --format "value(ENDPOINT_ID)")
echo "ENDPOINT_ID = ${endpoint_id}"

# # call prediction endpoint
# input_text="Take away the CGI and the A-list cast and you end up with film with less punch."
# echo "INPUT TEXT = ${input_text}"

prediction=$(
echo """
{ 
    "instances": [
        {
            "text": "The product was excellent and exceeded my expectations.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
            "verified_purchase": true
        },
        {
            "text": "The product was bad and sucked to live my expectations.",
            "title": "Excellent Quality",
            "price": 49.99,
            "price_missing": false,
            "helpful_vote": 15,
            "verified_purchase": true
        }

    ]
}
""" | gcloud beta ai endpoints predict ${endpoint_id} --region=$REGION --json-request -)

echo "PREDICTION RESPONSE = ${prediction}"

REGION = us-central1
ENDPOINT DISPLAY NAME = review_sentiment_bert_model-endpoint


Using endpoint [https://us-central1-aiplatform.googleapis.com/]


ENDPOINT_ID = 4555119443713196032


Using endpoint [https://us-central1-prediction-aiplatform.googleapis.com/]
ERROR: (gcloud.beta.ai.endpoints.predict) HTTPError 404: {
  "code": 404,
  "type": "ModelNotFoundException",
  "message": "Model not found: review_sentiment_bert_model"
}



PREDICTION RESPONSE = 


In [22]:
%%bash -s $REGION $endpoint_display_name

REGION=$1
endpoint_display_name=$2

# Get endpoint ID
echo "REGION = ${REGION}"
echo "ENDPOINT DISPLAY NAME = ${endpoint_display_name}"
endpoint_id=$(gcloud beta ai endpoints list --region ${REGION} --filter "display_name=${endpoint_display_name}" --format "value(ENDPOINT_ID)")
echo "ENDPOINT_ID = ${endpoint_id}"

# Input JSON file path
instances_file="./instances.json"
if [ ! -f "$instances_file" ]; then
  echo "Input file ${instances_file} not found!"
  exit 1
fi

# Perform prediction using the endpoint
echo "Performing prediction with ${instances_file}..."
prediction=$(gcloud beta ai endpoints predict ${endpoint_id} --region=$REGION --json-request="${instances_file}")

# Output the prediction result
echo "PREDICTION RESPONSE = ${prediction}"


REGION = us-central1
ENDPOINT DISPLAY NAME = review_sentiment_bert_model-endpoint


Using endpoint [https://us-central1-aiplatform.googleapis.com/]


ENDPOINT_ID = 4555119443713196032
Performing prediction with ./instances.json...


Using endpoint [https://us-central1-prediction-aiplatform.googleapis.com/]
ERROR: (gcloud.beta.ai.endpoints.predict) HTTPError 404: {
  "code": 404,
  "type": "ModelNotFoundException",
  "message": "Model not found: review_sentiment_bert_model"
}



PREDICTION RESPONSE = 


In [37]:
%%bash
gcloud beta ai models list --region=us-central1


Using endpoint [https://us-central1-aiplatform.googleapis.com/]


MODEL_ID             DISPLAY_NAME
9036777217039794176  review_sentiment_bert_model-v2
3584325443177676800  finetuned-bert-classifier-7080-v1
1653407102942576640  finetuned-bert-classifier-v1
