In [9]:
USER_FLAG = "--user"

# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_predict_text_classification_single_label_sample]
from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import predict
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def predict_text_classification_single_label_sample(
    project: str,
    endpoint_id: str,
    content: str,
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    instance = predict.instance.TextClassificationPredictionInstance(
        content=content,
    ).to_value()
    instances = [instance]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )
    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )
    print("response")
    print(" deployed_model_id:", response.deployed_model_id)

    predictions = response.predictions
    for prediction in predictions:
        print(" prediction:", dict(prediction))
    
    return dict(predictions[0])


# [END aiplatform_predict_text_classification_single_label_sample]

In [10]:
PROJECT_ID = "training1emakia"  # @param {type:"string"}
print(PROJECT_ID)

training1emakia


In [11]:
BUCKET_NAME = "{MY_NAME}_batch_prediction2"  # @param {type:"string"}
BUCKET_URI = f"gs://{BUCKET_NAME}"
print(BUCKET_NAME)
print(BUCKET_URI)

{MY_NAME}_batch_prediction2
gs://{MY_NAME}_batch_prediction2


In [12]:
import pandas as pd
#import jsonlines
import json
from google.cloud import bigquery
from google.cloud import storage

client = bigquery.Client()

In [13]:
predict_text_classification_single_label_sample(
    project="12807912884",
    endpoint_id="8493069816616189952",
    location="us-central1",
    content="@noavggirlhere Oh really?  So you condemn all the right wingers who said the Paul Pelosi attack was just a “lover’s quarrel” right?  You see I know that you know how to use kind words, but I also know that right wingers will justify evil when it suits them."
)

response
 deployed_model_id: 3025018171779186688
 prediction: {'ids': ['5612351825744232448', '1000665807316844544'], 'confidences': [0.4910487234592438, 0.5089512467384338], 'displayNames': ['1', '0']}


{'ids': ['5612351825744232448', '1000665807316844544'],
 'confidences': [0.4910487234592438, 0.5089512467384338],
 'displayNames': ['1', '0']}

In [14]:
FOLDER_NAME= f"output"

from google.cloud import storage
client = storage.Client()
for blob in client.list_blobs(BUCKET_NAME, prefix=FOLDER_NAME):
  print(str(blob))

input_file_name = "{MY_NAME}_all_prediction.jsonl"

#gcs_source = 'gs://prediction-data-english/batchpredictioncsv.csv'
gcs_source = 'gs://{MY_NAME}_batch_prediction2/dataforbatchprediction_csv.csv' 
data = pd.read_csv(
    gcs_source,      # relative python path to subdirectory
    sep=',',           # Tab-separated value file.
    #quotechar="'",        # single quote allowed as quote character
    usecols=['id', 'text']  # Only load the three columns specified.
    #na_values=['.', '??']       # Take any '.' or '??' values as NA
)

bucket = client.bucket(BUCKET_NAME)
# Iterate over the prediction instances, creating a new TXT file
# for each.
input_file_data = []
for count, row in data.iterrows():
    response = predict_text_classification_single_label_sample(
    project="12807912884",
    endpoint_id="8493069816616189952",
    location="us-central1",
    content=row["text"])
    instance_name = f"input_{count}.txt"
    instance_file_uri = f"{BUCKET_URI}/{instance_name}"
    # Add the data to store in the JSONL input file.
    tmp_data = {"instance": {"content": instance_file_uri, "mimeType": "text/plain"},
               "prediction": response,
               "id": row["id"],
               "tweet content": row["text"]}
    input_file_data.append(tmp_data)

    # Create the new instance file
    blob = bucket.blob(instance_name)
    blob.upload_from_string(row["text"])

input_str = "\n".join([str(d) for d in input_file_data])
file_blob = bucket.blob(f"{input_file_name}")
file_blob.upload_from_string(input_str)
print("done")



<Blob: {MY_NAME}_batch_prediction2, output/, 1702080916264130>
response
 deployed_model_id: 3025018171779186688
 prediction: {'displayNames': ['1', '0'], 'ids': ['5612351825744232448', '1000665807316844544'], 'confidences': [0.9794110655784607, 0.02058890089392662]}
response
 deployed_model_id: 3025018171779186688
 prediction: {'confidences': [0.8804326057434082, 0.1195673942565918], 'ids': ['5612351825744232448', '1000665807316844544'], 'displayNames': ['1', '0']}
response
 deployed_model_id: 3025018171779186688
 prediction: {'confidences': [0.9676119089126587, 0.032388050109148026], 'ids': ['5612351825744232448', '1000665807316844544'], 'displayNames': ['1', '0']}
response
 deployed_model_id: 3025018171779186688
 prediction: {'displayNames': ['1', '0'], 'confidences': [0.29037636518478394, 0.7096236348152161], 'ids': ['5612351825744232448', '1000665807316844544']}
response
 deployed_model_id: 3025018171779186688
 prediction: {'ids': ['5612351825744232448', '1000665807316844544'], 'co