# Templatized notebook for running CB-Geo MPM TAPIS job

## Install DesignSafe API (dapi)

In [5]:
!pip uninstall dapi --yes
!pip install -e ../..

Found existing installation: dapi 1.0.0
Uninstalling dapi-1.0.0:
  Successfully uninstalled dapi-1.0.0
Obtaining file:///Users/krishna/dev/DesignSafe/dapi
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: dapi
  Building editable for dapi (pyproject.toml) ... [?25ldone
[?25h  Created wheel for dapi: filename=dapi-1.0.0-py3-none-any.whl size=3825 sha256=f9fd4761dd2e940e7cd0f96795280478e27aacfbc14275ffb57c0ee44f9b129c
  Stored in directory: /private/var/folders/w8/xz590jyd7r36zmxcspgzj3z40000gn/T/pip-ephem-wheel-cache-q5wx61zs/wheels/98/df/91/ed70fe2dca11c3c6e5b6e8e6eef18c373a119d095037f892a3
Successfully built dapi
Installing collected packages: dapi
Successfully installed dapi-1.0.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release 

In [2]:
import os
from dapi import (
    DSClient,
    AppDiscoveryError,
    FileOperationError,
    JobSubmissionError,
    JobMonitorError,
)
import json
from datetime import datetime
from dataclasses import asdict

In [3]:
try:
    ds = DSClient()
except Exception as e:
    print(f"Authentication failed: {e}")
    raise SystemExit("Stopping notebook due to authentication failure.")

Authentication successful.


In [4]:
ds_path: str = "/MyData/mpm-benchmarks/2d/uniaxial_stress/"
input_filename: str = "mpm.json"
max_job_minutes: int = 10
# queue: str = "skx" # Example override - only if needed and valid
tacc_allocation: str = "BCS20003"
app_id_to_use = "mpm"  # Or "mpm-s3" - use the correct ID verified on DesignSafe

In [5]:
try:
    input_uri = ds.files.translate_path_to_uri(ds_path)
    print(f"Input Directory Tapis URI: {input_uri}")
except Exception as e:
    print(f"Error translating path '{ds_path}': {e}")
    raise SystemExit("Stopping notebook due to path translation error.")

Translated '/MyData/mpm-benchmarks/2d/uniaxial_stress/' to 'tapis://designsafe.storage.default/kks32/mpm-benchmarks/2d/uniaxial_stress/' using t.username
Input Directory Tapis URI: tapis://designsafe.storage.default/kks32/mpm-benchmarks/2d/uniaxial_stress/


In [6]:
try:
    print("\nGenerating job request dictionary using app defaults and overrides...")
    # Call the generate_request method
    job_req_dict = ds.jobs.generate_request(
        app_id=app_id_to_use,
        input_dir_uri=input_uri,
        script_filename=input_filename,
        # --- Provide overrides here ---
        max_minutes=max_job_minutes,
        allocation=tacc_allocation,
        # queue=queue, # Only include if overriding
        # job_name="my_custom_mpm_job", # Optional name override
        # node_count=1, # Optional resource override
        # cores_per_node=1, # Optional resource override
    )
    print("\n--- Generated Job Request Dictionary ---")
    print(json.dumps(job_req_dict, indent=2, default=str))
    print("---------------------------------------")

except (
    AppDiscoveryError,
    ValueError,
    JobSubmissionError,
) as e:  # Catch errors during generation
    print(f"Error generating job request: {e}")
    raise SystemExit("Stopping notebook due to job request generation error.")
except Exception as e:
    print(f"An unexpected error occurred during job request generation: {e}")
    raise SystemExit("Stopping notebook due to unexpected generation error.")


Generating job request dictionary using app defaults and overrides...
Generating job request for app 'mpm'...
Using App Details: mpm v1.1.0
Placing script 'mpm.json' in appArgs: 'Input Script'
Adding allocation: BCS20003
Job request dictionary generated successfully.

--- Generated Job Request Dictionary ---
{
  "name": "mpm-20250427_095544",
  "appId": "mpm",
  "appVersion": "1.1.0",
  "description": "Material Point Method (MPM) is a particle based method that represents the material as a collection of material points, and their deformations are determined by Newton\u2019s laws of motion.",
  "execSystemId": "frontera",
  "archiveSystemId": "frontera",
  "archiveOnAppError": true,
  "execSystemLogicalQueue": "normal",
  "nodeCount": 3,
  "coresPerNode": 56,
  "maxMinutes": 10,
  "memoryMB": 192000,
  "isMpi": false,
  "tags": [],
  "fileInputs": [
    {
      "name": "Input Directory",
      "sourceUrl": "tapis://designsafe.storage.default/kks32/mpm-benchmarks/2d/uniaxial_stress/",
 

In [None]:
# At this point, the user can inspect and modify job_req_dict if needed.
# For example:
# print("Modifying job request dictionary...")
# job_req_dict['description'] = "My modified description"
# job_req_dict['parameterSet']['envVariables'].append({'key': 'MY_EXTRA_VAR', 'value': 'extra_value'})
# print("\n--- Modified Job Request Dictionary ---")
# print(json.dumps(job_req_dict, indent=2, default=str))
# print("--------------------------------------")

In [7]:
if "job_req_dict" not in locals():
    print("Error: job_req_dict not found. Please run the previous cell.")
    raise SystemExit("Stopping notebook.")

try:
    print("\nSubmitting the job request dictionary...")
    # Pass the potentially modified dictionary to submit_request
    submitted_job = ds.jobs.submit_request(job_req_dict)
    print(f"Job Submitted Successfully!")
    print(f"Job UUID: {submitted_job.uuid}")

except JobSubmissionError as e:
    print(f"Job submission failed: {e}")
    # Print the dictionary that was attempted
    print("\n--- Failed Job Request ---")
    print(json.dumps(job_req_dict, indent=2, default=str))
    print("--------------------------")
    raise SystemExit("Stopping notebook due to job submission error.")
except Exception as e:
    print(f"An unexpected error occurred during job submission: {e}")
    raise SystemExit("Stopping notebook due to unexpected submission error.")


Submitting the job request dictionary...

--- Submitting Tapis Job Request ---
{
  "name": "mpm-20250427_095544",
  "appId": "mpm",
  "appVersion": "1.1.0",
  "description": "Material Point Method (MPM) is a particle based method that represents the material as a collection of material points, and their deformations are determined by Newton\u2019s laws of motion.",
  "execSystemId": "frontera",
  "archiveSystemId": "frontera",
  "archiveOnAppError": true,
  "execSystemLogicalQueue": "normal",
  "nodeCount": 3,
  "coresPerNode": 56,
  "maxMinutes": 10,
  "memoryMB": 192000,
  "isMpi": false,
  "tags": [],
  "fileInputs": [
    {
      "name": "Input Directory",
      "sourceUrl": "tapis://designsafe.storage.default/kks32/mpm-benchmarks/2d/uniaxial_stress/",
      "autoMountLocal": true,
      "targetPath": "inputDirectory"
    }
  ],
  "parameterSet": {
    "appArgs": [
      {
        "name": "Input Script",
        "arg": "mpm.json"
      }
    ],
    "schedulerOptions": [
      {
  

In [9]:
if "submitted_job" not in locals():
    print("Error: submitted_job not found.")
    raise SystemExit("Stopping notebook.")
try:
    print(f"\nMonitoring job {submitted_job.uuid}...")
    final_status = submitted_job.monitor(interval=30)
    print(f"\nJob {submitted_job.uuid} finished monitoring.")
    print(f"Final Status: {final_status}")
except JobMonitorError as e:
    print(f"\nJob monitoring failed or timed out: {e}")
    final_status = submitted_job.status
    print(f"Last known status: {final_status}")
except KeyboardInterrupt:
    print("\nMonitoring stopped by user.")
    final_status = submitted_job.status
    print(f"Last known status: {final_status}")
except Exception as e:
    print(f"\nAn unexpected error occurred during monitoring: {e}")
    try:
        final_status = submitted_job.status
        print(f"Last known status: {final_status}")
    except:
        final_status = "UNKNOWN"
        print("Could not retrieve last known status.")


Monitoring job b196bf63-795e-4533-9544-b9ef16b1a04c-007...
Monitoring job b196bf63-795e-4533-9544-b9ef16b1a04c-007 (Initial Status: STAGING_JOB, Timeout: 10 mins, Interval: 30s)
	Job b196bf63-795e-4533-9544-b9ef16b1a04c-007 Status: QUEUED (2025-04-27T09:59:31.487189)
	Job b196bf63-795e-4533-9544-b9ef16b1a04c-007 Status: RUNNING (2025-04-27T10:00:01.917496)
	Job b196bf63-795e-4533-9544-b9ef16b1a04c-007 Status: ARCHIVING (2025-04-27T10:03:05.393456)

Error during monitoring for job b196bf63-795e-4533-9544-b9ef16b1a04c-007: Monitoring timeout after 10 minutes for job b196bf63-795e-4533-9544-b9ef16b1a04c-007. Last status: ARCHIVING

Job monitoring failed or timed out: Error monitoring job b196bf63-795e-4533-9544-b9ef16b1a04c-007: Monitoring timeout after 10 minutes for job b196bf63-795e-4533-9544-b9ef16b1a04c-007. Last status: ARCHIVING
Last known status: ARCHIVING


In [10]:
if "final_status" in locals() and final_status in ["FINISHED", "FAILED"]:
    print(f"\nAttempting to display runtime summary for job {submitted_job.uuid}...")
    try:
        submitted_job.print_runtime_summary(verbose=False)
    except Exception as e:
        print(f"Could not display runtime summary: {e}")
elif "submitted_job" in locals():
    print(
        f"\nSkipping runtime summary because job {submitted_job.uuid} did not finish normally (Status: {final_status})."
    )
else:
    print(
        "\nSkipping runtime summary as job submission or monitoring did not complete."
    )


Skipping runtime summary because job b196bf63-795e-4533-9544-b9ef16b1a04c-007 did not finish normally (Status: ARCHIVING).


In [11]:
if (
    "submitted_job" in locals()
    and "final_status" in locals()
    and final_status in ["FINISHED", "FAILED", "CANCELLED", "STOPPED"]
):
    print(f"\nAttempting to access archive information for job {submitted_job.uuid}...")
    try:
        archive_uri = submitted_job.archive_uri
        if archive_uri:
            print(f"Job Archive Tapis URI: {archive_uri}")
            print("\nListing archive contents (root):")
            outputs = ds.files.list(archive_uri)
            if outputs:
                for item in outputs:
                    print(
                        f"- {item.name} (Type: {item.type}, Size: {item.size} bytes, Modified: {item.lastModified})"
                    )
            else:
                print("No files found in the archive root directory.")
        else:
            print("Archive URI not available for this job.")
    except FileOperationError as e:
        print(f"Could not list archive files: {e}")
    except Exception as e:
        print(f"An unexpected error occurred while accessing archive information: {e}")
else:
    print(
        "\nSkipping archive listing as job did not complete or submission/monitoring failed."
    )


Skipping archive listing as job did not complete or submission/monitoring failed.


In [None]:
# Find all apps (less verbose)
all_apps = ds.apps.find("", verbose=False)
print(f"Found {len(all_apps)} total apps.")

Found 90 total apps.


In [None]:
# Find MPM apps specifically
mpm_apps = ds.apps.find("mpm", verbose=True)


Found 2 matching apps:
- mpm (Version: 1.1.0, Owner: wma_prtl)
- mpm-s3 (Version: 1.0, Owner: wma_prtl)



In [None]:
# Get details for the specific MPM app we want to use
app_id_to_use = "opensees-express"
app_details = ds.apps.get_details(app_id_to_use, verbose=True)

if not app_details:
    raise SystemExit(
        f"Could not find details for app '{app_id_to_use}'. Please check the app ID."
    )
# Print the app details

print(f"App Description: {app_details}")


App Details:
  ID: opensees-express
  Version: latest
  Owner: wma_prtl
  Execution System: wma-exec-01
  Description: OpenSees-EXPRESS provides users with a sequential OpenSees interpreter. It is ideal to run small sequential scripts on DesignSafe resources freeing up your own machine.
App Description: 
containerImage: tapis://cloud.data/corral/tacc/aci/CEP/applications/v3/opensees/latest/OpenSees-EXPRESS/opensees_express.zip
created: 2025-02-20T18:41:03.661272Z
deleted: False
description: OpenSees-EXPRESS provides users with a sequential OpenSees interpreter. It is ideal to run small sequential scripts on DesignSafe resources freeing up your own machine.
enabled: True
id: opensees-express
isPublic: True
jobAttributes: 
archiveOnAppError: True
archiveSystemDir: /tmp/${JobOwner}/tapis-jobs-archive/${JobCreateDate}/${JobName}-${JobUUID}
archiveSystemId: cloud.data
cmdPrefix: None
coresPerNode: 1
description: None
dtnSystemInputDir: !tapis_not_set
dtnSystemOutputDir: !tapis_not_set
dyna