# Saving Predictions in Onelake
**Note**: Run this notebook in Azure Machine Learning Serverless Spark Compute

This sample code needs to be **executed with the serverless spark compute** in Azure Machine Learning as the standard compute instance doesn't have Java installed (required for saving as delta tables)

In [None]:
#Execute through Serveress Spark Compute

# Install required packages:
# %pip install azure-ai-ml azure-identity pyspark delta-spark
import json
import pandas as pd
from azure.ai.ml import MLClient
from azure.identity import ClientSecretCredential
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, MapType
from delta import configure_spark_with_delta_pip

# ---------------------------
# 2. Service Principal Credentials
# ---------------------------
tenant_id = "<tenant-id>"
client_id = "<client-id>"
client_secret = "<client-secret>"

credential = ClientSecretCredential(tenant_id, client_id, client_secret)

# ---------------------------
# 3. AML Workspace Details
# ---------------------------
SUBSCRIPTION = "<subscription-id>"
RESOURCE_GROUP = "rg-we-atpws-aml"
WS_NAME = "aml-ws-atp001"

ml_client = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION,
    resource_group_name=RESOURCE_GROUP,
    workspace_name=WS_NAME,
)


# Endpoint configuration
online_endpoint_name = "credit-endpoint-78a2c255" #hardcoded AML online endpoint for scoring/prediction

# Step 1: Load your CSV file from datastore
input_path = "abfss://UnifiedData@onelake.dfs.fabric.microsoft.com/maag_bronze.Lakehouse/Files/raw-files/for-batch-scoring/data_with_headers.csv" #this is training csv file in a lakehouse

df = pd.read_csv(input_path)

# Step 2: Convert DataFrame to the endpoint's expected JSON format
request_data = {
    "input_data": {
        "columns": list(range(len(df.columns))),
        "index": list(range(len(df))),
        "data": df.values.tolist()
    }
}

# Step 3: Save as temporary JSON file
temp_request_file = "./batch_request.json" #
with open(temp_request_file, "w") as f:
    json.dump(request_data, f)


# Step 4: Invoke endpoint for batch scoring
result = ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file=temp_request_file,
    deployment_name="blue",
)

# Step 5: Parse predictions
try:
    predictions = json.loads(result)
except json.JSONDecodeError:
    predictions = eval(result)

# Step 6: Add predictions to original DataFrame
df['prediction'] = predictions

# ---------------------------
# 8. Save as Delta Table in OneLake
# ---------------------------

# Convert Pandas DataFrame (df) to Spark DataFrame
spark_df = spark.createDataFrame(df)

delta_path = "abfss://UnifiedData@onelake.dfs.fabric.microsoft.com/maag_gold.Lakehouse/Tables/dbo/azureml_card_cardit_scoring"  #the prediction to be saved as d delta table

spark_df.write.format("delta").mode("overwrite").save(delta_path)

print(f"âœ… Model card saved as Delta table at: {delta_path}")

StatementMeta(, , -1, SessionStarting, , SessionStarting)