In [1]:
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [6]:
def infer_image(image_path):
    
    import torch
    from PIL import Image
    from torchvision import transforms

    # Load the model
    model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
    model.eval()

    # Preprocess the image
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_image = Image.open(image_path)
    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0)  # Create a mini-batch as expected by the model

    # Move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_batch = input_batch.to('cuda')
        model.to('cuda')

    # Perform inference
    with torch.no_grad():
        output = model(input_batch)

    # Convert to probabilities
    probabilities = torch.nn.functional.softmax(output[0], dim=0)

    # Read the categories
    with open("imagenet_classes.txt", "r") as f:
        categories = [s.strip() for s in f.readlines()]

    # Get top 5 categories
    top5_prob, top5_catid = torch.topk(probabilities, 5)
    results = [(categories[top5_catid[i]], top5_prob[i].item()) for i in range(top5_prob.size(0))]

    return results

In [3]:
from globus_compute_sdk.serialize import CombinedCode
from globus_compute_sdk import Client
from globus_compute_sdk import Executor

c= Client(code_serialization_strategy=CombinedCode())


In [1]:
import os
from dotenv import load_dotenv

ENV_PATH = "./globus_torch.env"
load_dotenv(dotenv_path=ENV_PATH)
perlmutter_endpoint = os.getenv("ENDPOINT_ID")

In [14]:
def check_environment():
    import sys
    import subprocess

    environment_details = {}

    # # Get Python version
    # python_version = sys.version
    # environment_details["python_version"] = python_version

    # # Get Conda environment details
    # try:
    #     conda_list = subprocess.check_output(['conda', 'list']).decode('utf-8')
    #     environment_details["conda_list"] = conda_list
    # except Exception as e:
    #     environment_details["conda_list_error"] = str(e)

    # # Get pip installed packages
    # try:
    #     pip_list = subprocess.check_output([sys.executable, '-m', 'pip', 'list']).decode('utf-8')
    #     environment_details["pip_list"] = pip_list
    # except Exception as e:
    #     environment_details["pip_list_error"] = str(e)

    # Get system PATH
    path = sys.path
    environment_details["system_path"] = path

    return environment_details

In [17]:
with Executor(endpoint_id=perlmutter_endpoint, funcx_client=c) as gce:
    # ... then submit for execution, ...
    future = gce.submit(check_environment)  

    # ... and finally, wait for the result
    print(future.result())


TaskExecutionFailed: 
 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 Traceback from attempt: 1
 Traceback (most recent call last):
   File "/global/common/software/nersc/pe/conda/24.1.0/Miniconda3-py311_23.11.0-2/lib/python3.11/concurrent/futures/_base.py", line 401, in __get_result
     raise self._exception
   File "/global/homes/d/duccio/.local/lib/python3.11/site-packages/parsl/executors/high_throughput/executor.py", line 520, in _queue_management_worker
     s.reraise()
   File "/global/homes/d/duccio/.local/lib/python3.11/site-packages/parsl/app/errors.py", line 118, in reraise
     raise v
   File "/global/homes/d/duccio/.local/bin/process_worker_pool.py", line 446, in worker_watchdog
     raise WorkerLost(worker_id, platform.node())
     ^^^^^^^^^^^^^^^^^
 parsl.executors.high_throughput.errors.WorkerLost: Task failure due to loss of worker 3 on host nid002965

 --------------------------------------------------------------------++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 Traceback from attempt: 2
 Traceback (most recent call last):
   File "/global/common/software/nersc/pe/conda/24.1.0/Miniconda3-py311_23.11.0-2/lib/python3.11/concurrent/futures/_base.py", line 401, in __get_result
     raise self._exception
   File "/global/homes/d/duccio/.local/lib/python3.11/site-packages/parsl/executors/high_throughput/executor.py", line 520, in _queue_management_worker
     s.reraise()
   File "/global/homes/d/duccio/.local/lib/python3.11/site-packages/parsl/app/errors.py", line 118, in reraise
     raise v
   File "/global/homes/d/duccio/.local/bin/process_worker_pool.py", line 446, in worker_watchdog
     raise WorkerLost(worker_id, platform.node())
     ^^^^^^^^^^^^^^^^^
 parsl.executors.high_throughput.errors.WorkerLost: Task failure due to loss of worker 2 on host nid002965

 --------------------------------------------------------------------++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 Traceback from attempt: final attempt
 Traceback (most recent call last):
   File "/global/common/software/nersc/pe/conda/24.1.0/Miniconda3-py311_23.11.0-2/lib/python3.11/concurrent/futures/_base.py", line 401, in __get_result
     raise self._exception
   File "/global/homes/d/duccio/.local/lib/python3.11/site-packages/parsl/executors/high_throughput/executor.py", line 520, in _queue_management_worker
     s.reraise()
   File "/global/homes/d/duccio/.local/lib/python3.11/site-packages/parsl/app/errors.py", line 118, in reraise
     raise v
   File "/global/homes/d/duccio/.local/bin/process_worker_pool.py", line 446, in worker_watchdog
     raise WorkerLost(worker_id, platform.node())
     ^^^^^^^^^^^^^^^^^
 parsl.executors.high_throughput.errors.WorkerLost: Task failure due to loss of worker 0 on host nid002965

 --------------------------------------------------------------------

In [15]:
with Executor(endpoint_id=perlmutter_endpoint, funcx_client=c) as gce:
    # Submit the function for execution
    image_path = 'dog.jpg'  # Change to your image path
    # future = gce.submit(infer_image, image_path)
    future = gce.submit(infer_image, image_path)

    # Wait for the result
    result = future.result()
    print(result)

TaskExecutionFailed: 
 Traceback (most recent call last):
   File "/global/homes/d/duccio/.local/lib/python3.11/site-packages/globus_compute_endpoint/engines/helper.py", line 140, in _call_user_function
     result_data = f(*args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^
 TypeError: 'int' object is not subscriptable
