In [None]:
# First need to setup config
# Below code is for FABlib libaray importing
from fabrictestbed_extensions.fablib.fablib import FablibManager as fablib_manager

fablib = fablib_manager()

fablib.show_config();

In [None]:
fablib.validate_config()

In [None]:
fablib.save_config()

In [None]:
# Create Node with GPU
# pick which GPU type we will use (execute this cell). 

# choices include
# GPU_RTX6000
# GPU_TeslaT4
# GPU_A30
# GPU_A40
GPU_CHOICE = 'GPU_RTX6000' 

# don't edit - convert from GPU type to a resource column name
# to use in filter lambda function below
choice_to_column = {
    "GPU_RTX6000": "rtx6000_available",
    "GPU_TeslaT4": "tesla_t4_available",
    "GPU_A30": "a30_available",
    "GPU_A40": "a40_available"
}

column_name = choice_to_column.get(GPU_CHOICE, "Unknown")
print(f'{column_name=}')

In [None]:
# name the slice and the node 
slice_name=f'My Simple GPU Slice with {GPU_CHOICE}'
node_name='gpu-node'

print(f'Will create slice "{slice_name}" with node "{node_name}"')

In [None]:
# find a site with at least one available GPU of the selected type
site_override = None

if site_override:
    site = site_override
else:
    site = fablib.get_random_site(filter_function=lambda x: x[column_name] > 0)
print(f'Preparing to create slice "{slice_name}" with node {node_name} in site {site}')

In [None]:
# Create Slice. Note that by default submit() call will poll for 360 seconds every 10-20 seconds
# waiting for slice to come up. Normal expected time is around 2 minutes. 
slice = fablib.new_slice(name=slice_name)

# Add node with a 100G drive and a couple of CPU cores (default)
node = slice.add_node(name=node_name, site=site, disk=100, image='default_ubuntu_22')
node.add_component(model=GPU_CHOICE, name='gpu1')

#Submit Slice Request
slice.submit();

# If the state shows StableOK in the output, it means that setup is done 
# and you can use SSH Command below to connect the node.

In [None]:
slice = fablib.get_slice(name=slice_name)
slice.show();

In [None]:
node = slice.get_node(node_name) 
node.show()

gpu = node.get_component('gpu1')
gpu.show();

In [None]:
command = "sudo apt-get install -y pciutils && lspci | grep 'NVIDIA\|3D controller'"

stdout, stderr = node.execute(command)

In [None]:
distro='ubuntu2204'
version='12.2'
architecture='x86_64'

# install prerequisites
commands = [
    'sudo apt-get -q update',
    'sudo apt-get -q install -y linux-headers-$(uname -r) gcc',
]

print("Installing Prerequisites...")
for command in commands:
    print(f"++++ {command}")
    stdout, stderr = node.execute(command)

print(f"Installing CUDA {version}")
commands = [
    f'wget https://developer.download.nvidia.com/compute/cuda/repos/{distro}/{architecture}/cuda-keyring_1.1-1_all.deb',
    f'sudo dpkg -i cuda-keyring_1.1-1_all.deb',
    'sudo apt-get -q update',
    'sudo apt-get -q install -y cuda'
]
print("Installing CUDA...")
for command in commands:
    print(f"++++ {command}")
    stdout, stderr = node.execute(command)
    
print("Done installing CUDA")

In [None]:
reboot = 'sudo reboot'

print(reboot)
node.execute(reboot)

slice.wait_ssh(timeout=360,interval=10,progress=True)

print("Now testing SSH abilites to reconnect...",end="")
slice.update()
slice.test_ssh()
print("Reconnected!")

In [None]:
# Check if you have setup nvidia driver successfully
stdout, stderr = node.execute("nvidia-smi")

print(f"stdout: {stdout}")

In [None]:
# Check your python version.
stdout, stderr = node.execute("python3 -V")

print(f"stdout: {stdout}")

In [None]:
# sudo apt install python3-pip
commands = [
    'sudo apt update -y',
    'sudo apt-get -y install python3-pip',
    'sudo apt install -y mpich',
    'sudo apt install -y libopenmpi-dev',
    'sudo pip3 install -y mpi4py'
]

print("Installing mpi4py")
for command in commands:
    print(f"++++ {command}")
    stdout, stderr = node.execute(command)

In [None]:
distro='ubuntu2204'
version='12.2'
architecture='x86_64'

# install prerequisites
commands = [
    'pip install pip --upgrade',
    'pip install -y "appfl[analytics,examples]"',
]

print("Installing APPFL for User")
for command in commands:
    print(f"++++ {command}")
    stdout, stderr = node.execute(command)


In [None]:
# Be careful
# Delete Slice after the experiment
fablib.delete_slice(slice_name)