In [None]:
!pip install google-cloud-aiplatform

In [None]:
!pip install --upgrade google-api-python-client

In [10]:
from google.auth.transport.requests import Request
from google.oauth2.service_account import Credentials
from googleapiclient import discovery
from google.oauth2 import service_account
import google.auth
import random
from googleapiclient.errors import HttpError
import json

In [30]:
# You should change it to your own key path or use the oauth authenticate your GCP
key_path = "keys/rag-nick-2661627e4595.json"

In [38]:
!gcloud config set project rag-nick

Updated property [core/project].


In [40]:
!gcloud compute regions list --project=rag-nick

NAME                     CPUS  DISKS_GB  ADDRESSES  RESERVED_ADDRESSES  STATUS  TURNDOWN_DATE
africa-south1            0/24  0/4096    0/8        0/8                 UP
asia-east1               0/24  0/4096    0/8        0/8                 UP
asia-east2               0/24  0/4096    0/8        0/8                 UP
asia-northeast1          0/24  0/4096    0/8        0/8                 UP
asia-northeast2          0/24  0/4096    0/8        0/8                 UP
asia-northeast3          0/24  0/4096    0/8        0/8                 UP
asia-south1              0/24  0/4096    0/8        0/8                 UP
asia-south2              0/24  0/4096    0/8        0/8                 UP
asia-southeast1          0/24  0/4096    0/8        0/8                 UP
asia-southeast2          0/24  0/4096    0/8        0/8                 UP
australia-southeast1     0/24  0/4096    0/8        0/8                 UP
australia-southeast2     0/24  0/4096    0/8        0/8                 UP
europe

In [41]:
credentials = Credentials.from_service_account_file(
    key_path,
    scopes = ['https://www.googleapis.com/auth/cloud-platform']
)

if credentials.expired:
  credentials.refresh(Request())

In [42]:
PROJECT_ID = "rag-nick"

In [48]:
compute = discovery.build('compute', 'v1', credentials=credentials)

zones_to_check = 50

def list_gpus(project_id, zone):
    # this function get the lists of accelerators that this zone has
    response = compute.acceleratorTypes().list(project=project_id, zone=zone).execute()
    return response.get('items', [])


def find_gpu(project_id):
    # This function shuffle the regions loop 10 times to see if there are accelerator available
    regions_response = compute.regions().list(project=PROJECT_ID).execute()
    regions = [region['name'] for region in regions_response['items']]

    random.shuffle(regions)

    gpu_availability = []

    for region in regions:
        zones_response = compute.zones().list(project=project_id).execute()
        for zone_info in zones_response['items']:
            if zone_info['region'].endswith(region):
                zone = zone_info['name']
                gpus = list_gpus(project_id, zone)
                if gpus:
                    for gpu in gpus:
                        gpu_availability.append({
                            "region": region,
                            "zone": zone,
                            "gpu_type": gpu['name'],
                            "description": gpu['description']
                        })
                if len(gpu_availability) >= zones_to_check:
                    return gpu_availability

    return gpu_availability




In [49]:
# Show you the original result
results = find_gpu("rag-nick")

Note that there are three type of failed reason I have encountered:

1.   No GPU available
2.   GPU is already created in the same region - you cannot have same vm with same configuration in one region
3.   Machine image not match the GPU: This isn't shown up in this iteration, but this error means that some GPU such as A100, H100, requires g2 CPU, here I just don't choose g2 since I just want cheaper GPU for demo, so if you encounter this, you can just rerun.






In [None]:
!pip install prettytable
!pip install pandas

In [53]:
from prettytable import PrettyTable
import pandas as pd

# print the table
df = pd.DataFrame(results)
table = PrettyTable()
table.field_names = df.columns
table.add_rows(df.values)
print(table)

+-----------------+-------------------+-----------------------+---------------------------------------+
|      region     |        zone       |        gpu_type       |              description              |
+-----------------+-------------------+-----------------------+---------------------------------------+
|     us-east4    |     us-east4-c    |    nvidia-a100-80gb   |            NVIDIA A100 80GB           |
|     us-east4    |     us-east4-c    |    nvidia-h100-80gb   |            NVIDIA H100 80GB           |
|     us-east4    |     us-east4-c    |       nvidia-l4       |               NVIDIA L4               |
|     us-east4    |     us-east4-c    |     nvidia-l4-vws     |     NVIDIA L4 Virtual Workstation     |
|     us-east4    |     us-east4-c    |    nvidia-tesla-p4    |            NVIDIA Tesla P4            |
|     us-east4    |     us-east4-c    |  nvidia-tesla-p4-vws  |  NVIDIA Tesla P4 Virtual Workstation  |
|     us-east4    |     us-east4-c    |    nvidia-tesla-t4    | 

Below are steps to verify the existence of GPU in command line, no need for the final project, we are just using it to find GPUs

In [None]:
!gcloud auth login

In [None]:
!gcloud config set project <YOUR PROJECT NAME>

In [None]:
!gcloud compute instances list

NAME         ZONE                  MACHINE_TYPE   PREEMPTIBLE  INTERNAL_IP  EXTERNAL_IP   STATUS
vm-gpu-test  southamerica-east1-c  n1-standard-1               10.158.0.2   34.95.129.29  RUNNING


Once Successed, you should be able to see:

*   acceleratorCount: 1
*   acceleratorType

These represent the GPU is found!

In [None]:
!gcloud compute instances describe vm-gpu-test <THE ZONE THAT SUCCESS>

cpuPlatform: Intel Broadwell
creationTimestamp: '2024-02-22T18:32:13.416-08:00'
deletionProtection: false
disks:
- architecture: X86_64
  autoDelete: true
  boot: true
  deviceName: persistent-disk-0
  diskSizeGb: '10'
  guestOsFeatures:
  - type: UEFI_COMPATIBLE
  - type: VIRTIO_SCSI_MULTIQUEUE
  - type: GVNIC
  - type: SEV_CAPABLE
  index: 0
  interface: SCSI
  kind: compute#attachedDisk
  licenses:
  - https://www.googleapis.com/compute/v1/projects/debian-cloud/global/licenses/debian-12-bookworm
  mode: READ_WRITE
  source: https://www.googleapis.com/compute/v1/projects/rag-nick/zones/southamerica-east1-c/disks/vm-gpu-test
  type: PERSISTENT
fingerprint: WT-YEP98GA8=
guestAccelerators:
- acceleratorCount: 1
  acceleratorType: https://www.googleapis.com/compute/v1/projects/rag-nick/zones/southamerica-east1-c/acceleratorTypes/nvidia-tesla-t4
id: '1371832550560218445'
kind: compute#instance
labelFingerprint: 42WmSpB8rSM=
lastStartTimestamp: '2024-02-22T18:32:42.040-08:00'
machineType: 