# Saving Predictions in Blob Storage
Note: Run this notebook in Azure Machine Learning Compute Instance

## Save json payload+prediction from endpoint as table in blob storage
simulates POST requesets to Online Endpoit for prediction - save to blob storage

**Note:** This examples use the blob storage accesskey to authenticate (not preferred) - see a better approach below. This examples is provided as an alternative example 

In [7]:
import json
import pandas as pd
from azure.storage.blob import BlobServiceClient
from azure.ai.ml import MLClient

# AML configs
online_endpoint_name = "credit-endpoint-78a2c255"  #Hard-coded enpoint
deploy_dir = "./deploy"
request_file = f"{deploy_dir}/sample-request.json"  #Hard-coded Json Payload from previous cell

# Blob configs used for storing the prediction
BLOB_CONNECTION_STRING = "DefaultEndpointsProtocol=https;AccountName=<BLOB STORAGE ACCOUNT NAME>;AccountKey=<ACCOUNT KEY>;EndpointSuffix=core.windows.net"
CONTAINER_NAME = "azureml-batchpredictions"  #Blob Container
csv_file = "prediction_results.csv"  #Prediction to be saved as this file

# Step 1: Invoke AML endpoint and capture result
result = ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file=request_file,
    deployment_name="blue",
)

# Parse result (assuming JSON or list format)
try:
    prediction_result = json.loads(result)
except json.JSONDecodeError:
    # If result is a plain list string like '[1, 0]'
    prediction_result = eval(result)

print("Prediction Result:", prediction_result)

# Step 2: Load input data from request file
with open(request_file, "r") as f:
    request_data = json.load(f)

input_data = request_data["input_data"]["data"]
columns = request_data["input_data"]["columns"]

# Step 3: Combine input + prediction into DataFrame
df = pd.DataFrame(input_data, columns=[f"col_{c}" for c in columns])
df["prediction"] = prediction_result

# Step 4: Save as CSV locally
df.to_csv(csv_file, index=False)

# Step 5: Upload CSV to Azure Blob Storage
blob_service_client = BlobServiceClient.from_connection_string(BLOB_CONNECTION_STRING)
blob_client = blob_service_client.get_blob_client(container=CONTAINER_NAME, blob=csv_file)

with open(csv_file, "rb") as data:
    blob_client.upload_blob(data, overwrite=True)

print(f"‚úÖ File uploaded to Blob Storage: {CONTAINER_NAME}/{csv_file}")

Prediction Result: [1, 0]
‚úÖ File uploaded to Blob Storage: azureml-batchpredictions/prediction_results.csv


## Save batch score prediction as table
predict on .csv file - save to blob storage

**Note:** This examples doesn't use the blob storage accesskey to authenticate, but instead get the auth details from the datastore (preferred)

In [15]:
import json
import pandas as pd
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient
import datetime
import io

# Initialize ML Client
credential = DefaultAzureCredential()
ml_client = MLClient(
    credential=credential,
    subscription_id="<subscription-id>",
    resource_group_name="rg-we-atpws-aml",
    workspace_name="aml-ws-atp001",
)

# Endpoint configuration
online_endpoint_name = "credit-endpoint-78a2c255"

# Step 1: Load your CSV file from datastore
input_uri = "azureml://subscriptions/<subscription-id>/resourcegroups/rg-we-atpws-aml/workspaces/aml-ws-atp001/datastores/onelake_example_id/paths/raw-files/for-batch-scoring/data_with_headers.csv"
df = pd.read_csv(input_uri)

# Step 2: Convert DataFrame to the endpoint's expected JSON format
request_data = {
    "input_data": {
        "columns": list(range(len(df.columns))),
        "index": list(range(len(df))),
        "data": df.values.tolist()
    }
}

# Step 3: Save as temporary JSON file
temp_request_file = "./batch_request.json"
with open(temp_request_file, "w") as f:
    json.dump(request_data, f)

# Step 4: Invoke endpoint for batch scoring
result = ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file=temp_request_file,
    deployment_name="blue",
)

# Step 5: Parse predictions
try:
    predictions = json.loads(result)
except json.JSONDecodeError:
    predictions = eval(result)

# Step 6: Add predictions to original DataFrame
df['prediction'] = predictions

# Step 7: Save results to workspaceblobstore using Azure Blob Storage SDK
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
datastore_name = "workspaceblobstore"
# output_path = f"batch-scoring-results/scored_results_{timestamp}.csv"
output_path = f"prediction_results.csv"

# Get the datastore details
datastore = ml_client.datastores.get(datastore_name)

# Create blob service client using credential
blob_service_client = BlobServiceClient(
    account_url=f"https://{datastore.account_name}.blob.core.windows.net",
    credential=credential
)

# Get blob client
blob_client = blob_service_client.get_blob_client(
    container="azureml-batchpredictions", #datastore.container_name,
    blob=output_path
)

# Convert DataFrame to CSV in memory
csv_buffer = io.StringIO()
df.to_csv(csv_buffer, index=False)
csv_data = csv_buffer.getvalue()

# Upload to blob storage
blob_client.upload_blob(csv_data, overwrite=True)

# Build the full URI for reference
output_uri = f"azureml://subscriptions/{ml_client.subscription_id}/resourcegroups/{ml_client.resource_group_name}/workspaces/{ml_client.workspace_name}/datastores/{datastore_name}/paths/{output_path}"

print(f"‚úÖ Batch scoring complete!")
print(f"Total records scored: {len(df)}")
print(f"üìÅ Results saved to datastore: {datastore_name}")
print(f"üìÇ Path: {output_path}")
print(f"\nFull URI: {output_uri}")
print(f"\nSample predictions:\n{df[['prediction']].head()}")

‚úÖ Batch scoring complete!
Total records scored: 10
üìÅ Results saved to datastore: workspaceblobstore
üìÇ Path: prediction_results.csv

Full URI: azureml://subscriptions/<subscription-id>/resourcegroups/rg-we-atpws-aml/workspaces/aml-ws-atp001/datastores/workspaceblobstore/paths/prediction_results.csv

Sample predictions:
   prediction
0           1
1           0
2           0
3           1
4           0


# ExPERIEMENTS

In [None]:
%pip install azure-ai-ml azure-identity pyspark delta-spark

## Clean up resources

In [None]:
ml_client.online_endpoints.begin_delete(name=online_endpoint_name)