In [1]:
import os
import sys
from pathlib import Path

import datarobot as dr # type: ignore
from dotenv import load_dotenv # type: ignore

# The notebook should be executed from the project root directory
if "_correct_path" not in locals():
    os.chdir("..")
    sys.path.append(".")
    print(f"changed dir to {Path('.').resolve()})")
    _correct_path = True
load_dotenv()
client = dr.Client()

changed dir to /Users/farooq.azam/SAP_Related_Code/predictive-ai-starter)


In [2]:
import yaml # type: ignore
with open("scoring_related_info.yaml") as f:
    config = yaml.safe_load(f)
deployment_id = config["deployment_id"]
use_case_id = config["use_case_id"]
project_id = config["project_id"]
model_id = config["model_id"]
print(f"deployment_id: {deployment_id}")


deployment_id: 6771ad6b20c6f3f2bdb4a217


In [None]:
import datetime
current_run_timestamp=datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
print(f"current_run_timestamp: {current_run_timestamp}")

In [None]:
use_case=dr.UseCase.get(use_case_id)
project = use_case.list_projects()[0]
project_feature_lists = project.get_featurelists()
primary_feature_list = [ flist for flist in project_feature_lists if flist.name.find("known") != -1][0]
primary_feature_list_id = primary_feature_list.id
print(f"Primary feature list ID: {primary_feature_list_id}")

In [None]:
import pandas as pd # type: ignore
from infra.settings_datasets import training_dataset
from datarobotx.idp.datasets import get_or_create_dataset_from_datasource # type: ignore
from datarobotx.idp.datasource import get_or_create_datasource # type: ignore
from datarobotx.idp.datastore import get_or_create_datastore # type: ignore
from datarobotx.idp.credentials import get_replace_or_create_credential # type: ignore


sap_dsp_data_store_canonical_name=os.getenv("SAP_DSP_DATA_STORE_CANONICAL_NAME")
sap_dsp_host_name=os.getenv("SAP_DSP_HOST_NAME")
sap_dsp_port=os.getenv("SAP_DSP_PORT")
sap_dsp_data_source=os.getenv("SAP_DSP_DATA_SOURCE") 
sap_dsp_scoring_primary_data_query=os.getenv("SAP_DSP_LATE_PAYMENTS_PRIMARY_SCORING_DATA_QUERY")
sap_dsp_scoring_primary_data_set=os.getenv("SAP_DSP_LATE_PAYMENTS_HOLDOUT_PRIMARY_DATA_SET")
sap_dsp_scoring_secondary_data_query=os.getenv("SAP_DSP_LATE_PAYMENTS_SECONDARY_SCORING_DATA_QUERY")
sap_dsp_scoring_secondary_data_set=os.getenv("SAP_DSP_LATE_PAYMENTS_HOLDOUT_SECONDARY_DATA_SET")
sap_dsp_credentials=os.getenv("SAP_DSP_CREDENTIALS")

sap_dsp_credentials_id = get_replace_or_create_credential(
    endpoint=client.endpoint,
    token=client.token,
    name=sap_dsp_credentials,
    credential_type="basic",
)
print(f"SAP Credentials ID - {sap_dsp_credentials_id}")
sap_dsp_credentials = dr.Credential.get(sap_dsp_credentials_id)
print(f"SAP Credentials - {sap_dsp_credentials}")
sap_dsp_data_store_id=get_or_create_datastore(
    endpoint=client.endpoint,
    token=client.token,
    canonical_name=sap_dsp_data_store_canonical_name,
    driver_id='66c8ecdd45d2b5465fd74b49',
    data_store_type='dr-database-v1',
    fields=[{"id":"host","name":"Host Name","value":sap_dsp_host_name},{"id":"port","name":"port","value":sap_dsp_port}],
)
sap_dsp_data_store=dr.DataStore.get(sap_dsp_data_store_id)
print(f"SAP Datastore ID - {sap_dsp_data_store_id}")
print(f"SAP Datastore - {sap_dsp_data_store}")
params = dr.DataSourceParameters(
    data_store_id=sap_dsp_data_store.id,
    query=sap_dsp_scoring_primary_data_query,
)
sap_dsp_data_source_id = get_or_create_datasource(
    endpoint=client.endpoint,
    token=client.token,
    data_source_type='dr-database-v1', 
    canonical_name=sap_dsp_data_source,
    params=params
)
print(f"SAP Datasource ID - {sap_dsp_data_source_id}")
print(f"SAP Datasource - {dr.DataSource.get(sap_dsp_data_source_id)}")
primary_scoring_dataset_id=get_or_create_dataset_from_datasource(
    endpoint=client.endpoint,
    token=client.token,
    data_source_id=sap_dsp_data_source_id,
    name=sap_dsp_scoring_primary_data_set,
    credential_id=sap_dsp_credentials.credential_id
)
print(f"SAP primary scoring data set ID - {primary_scoring_dataset_id}")
print(f"SAP primary scoring data set - {dr.Dataset.get(primary_scoring_dataset_id)}")
params = dr.DataSourceParameters(
    data_store_id=sap_dsp_data_store.id,
    query=sap_dsp_scoring_secondary_data_query,
)
sap_dsp_data_source_id = get_or_create_datasource(
    endpoint=client.endpoint,
    token=client.token,
    data_source_type='dr-database-v1', 
    canonical_name=sap_dsp_data_source,
    params=params
)
secondary_scoring_dataset_id=get_or_create_dataset_from_datasource(
    endpoint=client.endpoint,
    token=client.token,
    data_source_id=sap_dsp_data_source_id,
    name=sap_dsp_scoring_secondary_data_set,
    credential_id=sap_dsp_credentials.credential_id
)
print(f"SAP secondary scoring data set ID - {secondary_scoring_dataset_id}")
print(f"SAP secondary scoring data set - {dr.Dataset.get(secondary_scoring_dataset_id)}")
primary_scoring_dataset=dr.Dataset.get(primary_scoring_dataset_id)
primary_scoring_dataset_id=primary_scoring_dataset.id
secondary_scoring_dataset=dr.Dataset.get(secondary_scoring_dataset_id)
secondary_scoring_dataset_id=secondary_scoring_dataset.id

SAP Credentials ID - 6747707642a92d74cd796960
SAP Credentials - Credential('6747707642a92d74cd796960', 'DR_SAP_TEMPLATE_CRED', 'basic')
SAP Datastore ID - 676c46ce57b0ab717e45149a
SAP Datastore - DataStore('DR_SAP_TEMPLATE [72d5954]')
SAP Datasource ID - 676c46d257b0ab717e45149b
SAP Datasource - DataSource('LATE_PAYMENTS_TRAINING_DATA_DSP [4f747c4]')
SAP Training data set ID - 676cad9bfc42109cd1340d6c
SAP Training data set - Dataset(name='DRS_LATE_PAYMENTS_TRAINING_DATA_VIEW_DSP [3092c80]', id='676cad9bfc42109cd1340d6c')


In [None]:
#secondary_scoring_dataset = dr.Dataset.get('674a33cb5fb3881089667ad7')
model = dr.Model.get(project= project, model_id=model_id)
self_join_secondary_predict_dataset = dr.models.secondary_dataset.SecondaryDataset(
    catalog_id=secondary_scoring_dataset.id,
    catalog_version_id=secondary_scoring_dataset.version_id,  # Complete version lineage for predictions
    identifier="CUSTOMER_DATASET",
    snapshot_policy="latest",  # Fetch the latest database records
)
self_join_secondary_predict_dataset.to_dict()

# Create a prediction config with the original profile dataset (static) , and a "new" transactions prediction dataset

predict_config = dr.SecondaryDatasetConfigurations.create(
    project_id=project.id,
    name="Batch Prediction Configuration",
    featurelist_id=model.featurelist_id,
    secondary_datasets=[
        self_join_secondary_predict_dataset.to_dict(),
    ],
)

In [12]:
deployment = dr.Deployment.get(deployment_id)
data_stores = dr.DataStore.list(typ='all')
str_data_stores = [str(data_store) for data_store in data_stores]
index = str_data_stores.index("DataStore(\'DR_SAP_TEMPLATES_ALTERNATE\')")
dsp_data_alternate_store= data_stores[index]
creds = dr.Credential.list()
str_cred_names = [str(cred.name) for cred in creds]
index = str_cred_names.index("DR_SAP_TEMPLATE_CRED")
dsp_cred = creds[index]

#dataset_id = '674a33ca3e42ee7f62667a8e'
#scoring_dataset = dr.Dataset.get(dataset_id)
primary_scoring_dataset=dr.Dataset.get(primary_scoring_dataset_id)
batch_prediction_job_config={
    "deploymentId": deployment_id,
    "num_concurrent": 4,
    "intake_settings":
        {
        'type': 'dataset',
        'datasetId': primary_scoring_dataset_id, 
        },
    "passthrough_columns": 
        ['CUSTOMER_NUMBER', 'NET_PAYMENT_TERMS_PERIOD', 'Days_Late',
        'INVOICE_DUE_DATE', 'ORDER_DATE', 'CASH_DISCOUNT_DAYS_2',
        'Invoice_Due_Date_Year_Month', 'STATUS_MANUAL_PRICE_CHANGE',
        'INVOICE_NUMBER', 'CASH_DISCOUNT_DAYS_1', 'NET_VALUE_OF_ITEM',
        'CASH_DISCOUNT_AMOUNT', 'ITEM_CREDIT_PRICE', 'PAYMENT_TERM',
        'MATERIAL_NAME', 'AMT_ELIGIBLE_FOR_CASH_DISCOUNT',
        'ITEM_CREDIT_PRICE_ROUNDED', 'BASELINE_DT_FOR_DUE_DATE_CALC',
        'MATERIAL_NUMBER', 'CASH_DISCOUNT_PERCENTAGE_2',
        'ACTUAL_INVOICED_QUANTITY_CASES', 'Payment_Status',
        'INVOICE_TO_PAID_DAYS', 'EXPECTED_AMOUNT', 'CUSTOMER_NAME',
        'CASH_DISCOUNT_PERCENTAGE_1', 'SHIP_DATE', 'Days_to_Ship',
        'ORDER_TO_PAID_DAYS', 'ACTUAL_PAID_DATE'
        ],
    "output_settings":
        {
        'type': 'jdbc',
        'table': "PREDICTION_RESULTS_SAP_TEMPLATE_{{ current_run_timestamp }}",
        'schema': 'DR_SAP_TEMPLATES#TEMPLATE_DB_USER', 
        'statement_type': 'create_table',
        'data_store_id': dsp_data_alternate_store.id,
        'credential_id': dsp_cred.credential_id,
        },
}
batch_prediction_job_schedule = {
    "day_of_week": [1],
    "month": ["*"],
    "hour": [16],
    "minute": [0],
    "day_of_month": [1]
}


In [None]:
from datarobotx.idp.batch_predictions import get_update_or_create_batch_prediction_job # type: ignore
from infra.settings_main import project_name
batch_prediction_job=get_update_or_create_batch_prediction_job(
    endpoint=client.endpoint,
    token=client.token,
    deployment_id=deployment_id,
    enabled=True,
    name= f"Recipe Template Batch Prediction [{project_name}]",
    batch_prediction_job=batch_prediction_job_config,
    schedule=batch_prediction_job_schedule,
    
)
print(f"batch_prediction_job: {batch_prediction_job}")

In [None]:
print(f"batch_prediction_job: {batch_prediction_job}")
prediction_job_definition=dr.BatchPredictionJobDefinition.get(batch_prediction_job)
job = prediction_job_definition.run_once()
job.wait_for_completion()
print(f"Batch prediction job completed with status: {job.status}")
print(f"Batch predictions written to table: {batch_prediction_job_config['output_settings']['table']}") 
