In [228]:
REGION = "us-west1"
PROJECT_ID = "rag-nick"
REPOSITORY = "highlight-app"
IMAGE = "highlight-pipeline-gpu-finetuned2"
TAG = "py310-cu12.3-torch-2.2.0-transformers-4.38.1"
BUCKET_NAME = "highlight-app-finetuned-storage"
BUCKET_URI = f"gs://{BUCKET_NAME}/paraphrase-MiniLM-L6-v2/model.tar.gz"

In [None]:
!pip install "google-cloud-aiplatform[prediction]>=1.16.0"
!pip install -r huggingface_predictor_gpu/requirements.txt

In [230]:
import os
from google.cloud.aiplatform.prediction import LocalModel

from huggingface_predictor.predictor import HuggingFacePredictor

local_model = LocalModel.build_cpr_model(
    "huggingface_predictor",
    f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}:{TAG}",
    predictor=HuggingFacePredictor,
    requirements_path="huggingface_predictor/requirements.txt",
    base_image="--platform=linux/amd64 python:3.10-slim AS build",
)

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


In [208]:
!gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet


{
  "credHelpers": {
    "asia.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud",
    "us-east4-docker.pkg.dev": "gcloud",
    "us-west1-docker.pkg.dev": "gcloud",
    "us.gcr.io": "gcloud"
  }
}
Adding credentials for: us-west1-docker.pkg.dev
gcloud credential helpers already registered correctly.


In [209]:
!gcloud artifacts repositories create highlight-app --repository-format=docker --location={REGION}

Create request issued for: [highlight-app]
Waiting for operation [projects/rag-nick/locations/us-west1/operations/52955bb8
-0d31-4223-a195-ca04f23676d5] to complete...done.                              
Created repository [highlight-app].


In [211]:
local_model.push_image()

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


In [20]:
!git lfs install
!git clone https://huggingface.co/tonychenxyz/paraphrase-MiniLM-L6-v2-finetune-summary

Updated Git hooks.
Git LFS initialized.
Cloning into 'paraphrase-MiniLM-L6-v2-finetune-summary'...
remote: Enumerating objects: 19, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 19 (delta 0), reused 0 (delta 0), pack-reused 3[K
Unpacking objects: 100% (19/19), done.


In [32]:
!cd paraphrase-MiniLM-L6-v2-finetune-summary/ && tar zcvf model.tar.gz --exclude flax_model.msgpack --exclude rust_model.ot * && mv model.tar.gz ../

a 1_Pooling
a 1_Pooling/config.json
a README.md
a added_tokens.json
a config.json
a config_sentence_transformers.json
a modules.json
a pytorch_model.bin
a sentence_bert_config.json
a special_tokens_map.json
a tokenizer.json
a tokenizer_config.json
a vocab.txt


In [16]:
!gsutil mb -l $REGION -b on -p $PROJECT_ID gs://$BUCKET_NAME

Creating gs://highlight-app-finetuned-storage/...


In [107]:
!gcloud config set storage/parallel_composite_upload_enabled True
!gcloud storage cp model.tar.gz $BUCKET_URI

Updated property [storage/parallel_composite_upload_enabled].
Copying file://model.tar.gz to gs://highlight-app-finetuned-storage/paraphrase-MiniLM-L6-v2/model.tar.gz
  Completed files 1/1 | 79.8MiB/79.8MiB | 16.9MiB/s                            

Average throughput: 12.7MiB/s


In [212]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

In [213]:
model = aiplatform.Model.upload(
    display_name="paraphrase-MiniLM-L6-v2",
    artifact_uri=f"gs://{BUCKET_NAME}/paraphrase-MiniLM-L6-v2",
    serving_container_image_uri=local_model.get_serving_container_spec().image_uri,
    serving_container_environment_variables={
        "HF_TASK": "summarization",
        # Optional env var so that `uvicorn` only runs the model in 1 worker
        # "VERTEX_CPR_WEB_CONCURRENCY": 1,
    },
)

Creating Model
Create Model backing LRO: projects/522870214401/locations/us-west1/models/2995834934154756096/operations/7954490538424532992
Model created. Resource name: projects/522870214401/locations/us-west1/models/2995834934154756096@1
To use this Model in another session:
model = aiplatform.Model('projects/522870214401/locations/us-west1/models/2995834934154756096@1')


In [214]:
# endpoint = model.deploy(
#     machine_type="g2-standard-4",
#     accelerator_type="NVIDIA_L4",
#     accelerator_count=1,
# )
endpoint = model.deploy(machine_type="e2-standard-4")
print(endpoint.resource_name)

Creating Endpoint
Create Endpoint backing LRO: projects/522870214401/locations/us-west1/endpoints/2549463999603277824/operations/6762162537078194176
Endpoint created. Resource name: projects/522870214401/locations/us-west1/endpoints/2549463999603277824
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/522870214401/locations/us-west1/endpoints/2549463999603277824')
Deploying model to Endpoint : projects/522870214401/locations/us-west1/endpoints/2549463999603277824
Deploy Endpoint model backing LRO: projects/522870214401/locations/us-west1/endpoints/2549463999603277824/operations/1136040702585602048
Endpoint model deployed. Resource name: projects/522870214401/locations/us-west1/endpoints/2549463999603277824
projects/522870214401/locations/us-west1/endpoints/2549463999603277824


In [224]:
import json
from google.api import httpbody_pb2
from google.cloud import aiplatform_v1

# Initialize AI Platform Prediction client
client_options = {"api_endpoint": "us-west1-aiplatform.googleapis.com"}
prediction_client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)

# Endpoint resource name
endpoint_name = "projects/522870214401/locations/us-west1/endpoints/2549463999603277824"

# instances = aiplatform_v1.Value()
# instances.sequences = ["This is an example sentence", "Each sentence is converted"]
# # Prepare your input data
instance = {"sequences": ["example sentence", "Each sentence is converted"]}
print(instance)

# payload = [{"instances": instances}]
request = aiplatform_v1.PredictRequest(endpoint=endpoint_name)
request.instances.append(instance)
print(request)
print(type(request.instances))

response = prediction_client.predict(request=request)
print(response.metadata)
print(response.predictions)

{'sequences': ['example sentence', 'Each sentence is converted']}
endpoint: "projects/522870214401/locations/us-west1/endpoints/2549463999603277824"
instances {
  struct_value {
    fields {
      key: "sequences"
      value {
        list_value {
          values {
            string_value: "example sentence"
          }
          values {
            string_value: "Each sentence is converted"
          }
        }
      }
    }
  }
}

<class 'proto.marshal.collections.repeated.RepeatedComposite'>
None
[]


In [227]:
import json
from google.api import httpbody_pb2
from google.cloud import aiplatform_v1

# Initialize AI Platform Prediction client
client_options = {"api_endpoint": "us-west1-aiplatform.googleapis.com"}
prediction_client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)

# Endpoint resource name
endpoint = "projects/522870214401/locations/us-west1/endpoints/2549463999603277824"

# instances = aiplatform_v1.Value()
# instances.sequences = ["This is an example sentence", "Each sentence is converted"]
# # Prepare your input data
data = {"sequences": ["example sentence", "Each sentence is converted"]}
json_data = json.dumps(data)
http_body = httpbody_pb2.HttpBody(
    data=json_data.encode("utf-8"),
    content_type="application/json",
)
print(http_body)

request = aiplatform_v1.RawPredictRequest(
    endpoint=endpoint,
    http_body=http_body,
)

response = prediction_client.raw_predict(request)
json.loads(response.data)


content_type: "application/json"
data: "{\"sequences\": [\"example sentence\", \"Each sentence is converted\"]}"



InternalServerError: 500 {"detail":"The following exception has occurred: KeyError. Arguments: ('instances',)."}

In [219]:
print(response)
print(type(response))
print(response.predictions)
print(response.metadata)

deployed_model_id: "4962373053083287552"
model: "projects/522870214401/locations/us-west1/models/2995834934154756096"
model_version_id: "1"
model_display_name: "paraphrase-MiniLM-L6-v2"

<class 'google.cloud.aiplatform_v1.types.prediction_service.PredictResponse'>
[]
None


In [89]:
# delete model
model.delete()

Deleting Model : projects/522870214401/locations/us-west1/models/6110637026434875392


FailedPrecondition: 400 The model "projects/522870214401/locations/us-west1/models/6110637026434875392" can't be deleted because it's deployed or being deployed at the following endpoint(s): projects/522870214401/locations/us-west1/endpoints/5130308061063282688, projects/522870214401/locations/us-west1/endpoints/8381906992024780800, projects/522870214401/locations/us-west1/endpoints/9183547725696729088. Undeploy the model from all endpoints first and then delete it.

## Deleting Models following the order: undeploy -> delete endpoint -> delete model

In [145]:
endpoints = aiplatform.Endpoint.list()
print(endpoints)

[<google.cloud.aiplatform.models.Endpoint object at 0x2a614ff10> 
resource name: projects/522870214401/locations/us-west1/endpoints/1634388850316935168]


In [146]:
!gcloud ai endpoints describe 1634388850316935168 --project=rag-nick --region=us-west1 --format="value(deployedModels)"

Using endpoint [https://us-west1-aiplatform.googleapis.com/]
{'createTime': '2024-04-24T13:34:23.961099Z', 'dedicatedResources': {'machineSpec': {'machineType': 'e2-standard-4'}, 'maxReplicaCount': 1, 'minReplicaCount': 1}, 'displayName': 'paraphrase-MiniLM-L6-v2', 'id': '918140587704582144', 'model': 'projects/522870214401/locations/us-west1/models/4594612801871282176', 'modelVersionId': '1'}


In [None]:
gcloud ai endpoints undeploy-model 1634388850316935168 --project=rag-nick --region=us-west1 --deployed-model-id=918140587704582144