In [None]:
from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication

In [None]:
# Create authentication object for user permissions
# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config
# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually
auth = TokenAuthentication(
    token = "XXXXX",
    server = "XXXXX",
    skip_tls=False
)
auth.login()

In [None]:
# Create and configure our cluster object
# The SDK will try to find the name of your default local queue based on the annotation "kueue.x-k8s.io/default-queue": "true" unless you specify the local queue manually below
cluster = Cluster(ClusterConfiguration(
    name='raytest',
    head_extended_resource_requests={'nvidia.com/gpu':1}, # For GPU enabled workloads set the head_extended_resource_requests and worker_extended_resource_requests
    worker_extended_resource_requests={'nvidia.com/gpu':1},
    num_workers=3,
    head_cpus=6,
    head_memory=30,
    worker_cpu_requests=6,
    worker_cpu_limits=6,
    worker_memory_requests=30,
    worker_memory_limits=30,
    # image="", # Optional Field 
    write_to_file=True, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources 
    # local_queue="local-queue-name" # Specify the local queue manually
))

In [None]:
cluster.up()

In [None]:
cluster.wait_ready()

In [None]:
cluster.details()

In [None]:
# Initialize the Job Submission Client
"""
The SDK will automatically gather the dashboard address and authenticate using the Ray Job Submission Client
"""
client = cluster.job_client

In [None]:
# Submit an example mnist job using the Job Submission Client
submission_id = client.submit_job(
    entrypoint="python gpt-j-6b.py",
    runtime_env={
        "working_dir": "./","pip": "requirements.txt",
        "env_vars": {
            "AWS_BUCKET_URI": "XXXXXX",
            "AWS_ACCESS_KEY_ID": "XXXXXX",
            "AWS_SECRET_ACCESS_KEY": "XXXXXX",
            "AWS_DEFAULT_REGION": "XXXXXX",
        },
    },
)
print(submission_id)

In [None]:
cluster.down()