# Posiedon - TACC (CPU) - UC (CPU & GPU) connected

## Set the project

In [None]:
import chi
from chi.lease import Lease
from chi.keypair import Keypair
from chi.server import Server
from chi.server import get_server
import time

chi.set('project_name', 'CH-822181') 

In [None]:
import os
ssh_key_file_priv=os.environ['HOME']+"/work/.ssh/id_rsa"
ssh_key_file_pub=os.environ['HOME']+"/work/.ssh/id_rsa.pub"

In [None]:
local_ip_cmd = "curl -s http://169.254.169.254/latest/meta-data/local-ipv4"

# Define the variables

In [None]:
hostname_prefix = 'pos'
image = 'CC-Ubuntu18.04'
network = 'sharedwan1'

### Create Master at TACC

In [None]:
chi.use_site('CHI@TACC')

In [None]:
tacc_key_pair = Keypair(keypair_public_key=ssh_key_file_pub)

print(f"TACC Key pair is created: {tacc_key_pair.key_name}")

In [None]:
master_name = "master"
master_node_type='compute_cascadelake'

# Create a lease for 1 node
master_lease = Lease(name=master_name, node_type=master_node_type)
master_lease.wait() # Ensure lease has started

print(f"Master Node Lease {master_name} is ACTIVE")

In [None]:
master_name = "master"
master_lease.prefix = hostname_prefix
# Launch 1 server
master = master_lease.create_server(name=master_name, image=image, net_name=network, key_name=tacc_key_pair.key_name)

print(f"Master Node Lease {master_name} created")

In [None]:
master_name = "master"
status = "BUILD"
master_name=f"{hostname_prefix}-{master_name}"

while status != "ACTIVE":
    m = get_server(master_name)
    status = m.status
    time.sleep(5)

master.associate_floating_ip()

print(f"Master Node {master_name} is ACTIVE {master.ip}")

In [None]:
master_script = "sudo git clone https://github.com/RENCI-NRIG/Mobius-scripts.git /root/Mobius-scripts" \
                "\nsudo cp /root/Mobius-scripts/scripts/posiedon/pos_mobius_start.sh /root/mobius_start.sh" \
                "\nsudo cp /root/Mobius-scripts/scripts/posiedon/panorama-data.sh /root/master.sh" \
                "\nsudo sh /root/mobius_start.sh WORKFLOW all" \
                "\nsudo sh /root/master.sh"

In [None]:
import paramiko 

key = paramiko.RSAKey.from_private_key_file(ssh_key_file_priv)
client = paramiko.SSHClient()
client.load_system_host_keys()
client.set_missing_host_key_policy(paramiko.MissingHostKeyPolicy())
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

In [None]:
client.connect(master.ip, username='cc',pkey = key)
stdin, stdout, stderr = client.exec_command(master_script)
#print (str(stdout.read(),'utf-8').replace('\\n','\n'))

print(f"Setup script executed on {master_name}")

In [None]:
client.connect(master.ip, username='cc',pkey = key)
stdin, stdout, stderr = client.exec_command(local_ip_cmd)
master_lip = str(stdout.read(),'utf-8').replace('\\n','\n')

print(f"Master node local IPv4: {master_lip}")

In [None]:
client.close()

### Create Workers at UC

In [None]:
chi.use_site('CHI@UC')

In [None]:
uc_key_pair = Keypair(keypair_public_key=ssh_key_file_pub)

print(f"UC Key pair is created: {tacc_key_pair.key_name}")

# Create CPU Workers

In [None]:
num_workers = 1
worker_node_type = 'compute_skylake'
image = 'CC-Ubuntu18.04'
network = 'sharedwan1'

In [None]:
cpu_worker_leases = []
for i in range(num_workers):
    name = f"cpu-worker{i}"
    # Create a lease for 1 node
    w_lease = Lease(name=name, node_type=worker_node_type)
    w_lease.wait() # Ensure lease has started
    cpu_worker_leases.append(w_lease)
    print(f"Worker Node Lease {name} created")

In [None]:
cpu_workers = []
i = 0
for l in cpu_worker_leases:
    name = f"cpu-worker{i}"
    i += 1

    l.prefix = hostname_prefix
    # Launch 1 server
    w = l.create_server(name=name, image=image, net_name=network, key_name=uc_key_pair.key_name)
    cpu_workers.append(w)
    print(f"Worker Node {w.name} created")

In [None]:
for w in cpu_workers:
    status = "BUILD"
    worker = w
    while status != "ACTIVE":
        worker = get_server(worker.name)
        status = worker.status
        time.sleep(5)

    w.associate_floating_ip()
    print(f"Worker Node {w.name} is ACTIVE IP: {w.ip}")

In [None]:
c_worker_script = f"sudo chmod 666 /etc/hosts;" \
                  f"sudo echo '{master_lip} {master_name}' >> /etc/hosts;" \
                  f"sudo chmod 644 /etc/hosts;" \
                  f"\nsudo git clone https://github.com/RENCI-NRIG/Mobius-scripts.git /root/Mobius-scripts" \
                  f"\nsudo cp /root/Mobius-scripts/scripts/posiedon/pos_mobius_start.sh /root/mobius_start.sh" \
                  f"\nsudo sh /root/mobius_start.sh WORKFLOW all" \
                  f"\nsudo cp /root/Mobius-scripts/scripts/posiedon/worker.sh /root/worker.sh" \
                  f"\nsudo sh /root/worker.sh"

In [None]:
print(c_worker_script)

In [None]:
for w in cpu_workers:
    client.connect(w.ip, username='cc', pkey=key)
    stdin, stdout, stderr = client.exec_command(c_worker_script)
    #print (str(stdout.read(),'utf-8').replace('\\n','\n'))

    print(f"Setup script executed on {w.name}")

# Create GPU Workers

In [None]:
num_workers = 1
worker_node_type = 'gpu_rtx_6000'
image = 'CC-Ubuntu18.04'
network = 'sharedwan1'

In [None]:
gpu_worker_leases = []
for i in range(num_workers):
    name = f"gpu-worker{i}"
    # Create a lease for 1 node
    w_lease = Lease(name=name, node_type=worker_node_type)
    w_lease.wait() # Ensure lease has started
    gpu_worker_leases.append(w_lease)
    print(f"Worker Node Lease {name} created")

In [None]:
gpu_workers = []
i = 0
for l in gpu_worker_leases:
    name = f"gpu-worker{i}"
    i += 1

    l.prefix = hostname_prefix
    # Launch 1 server
    w = l.create_server(name=name, image=image, net_name=network, key_name=uc_key_pair.key_name)
    gpu_workers.append(w)
    print(f"Worker Node {w.name} created")

In [None]:
for w in gpu_workers:
    status = "BUILD"
    worker = w
    while status != "ACTIVE":
        worker = get_server(worker.name)
        status = worker.status
        time.sleep(5)

    w.associate_floating_ip()
    print(f"Worker Node {w.name} is ACTIVE IP: {w.ip}")

In [None]:
for w in gpu_workers:
    client.connect(w.ip, username='cc', pkey=key)
    stdin, stdout, stderr = client.exec_command(c_worker_script)
    #print (str(stdout.read(),'utf-8').replace('\\n','\n'))

    print(f"Setup script executed on {w.name}")

# Cleanup

In [None]:
master_lease.delete()

In [None]:
for l in cpu_worker_leases:
    l.delete()

In [None]:
for l in gpu_worker_leases:
    l.delete()