# JIRIAF on FABRIC 

This notebook helps sets up a JIRIAF-managed set of compute nodes across one or more sites on FABRIC and provides them access to LBNL production load balancer via FABNetv4Ext network service. Nodes also communicate with JIRIAF control plane and measurement plane servers located at JLAB also via FABNetv4Ext connection. 

See the following diagram:

<div>
    <img src="figs/JIRIAF-slice.png" width=500>
</div>


## Preamble

This code should *always* be executed regardless of whether you are starting a new slice or returning to an existing slice.

In [None]:
#
# EDIT THIS
#

# Note for best management network IPv4 connectivity pick from
# 'UCSD', 'SRI', 'FIU' or 'TOKY' - these sites have
# IPv4. Other sites use IPv6 management and have trouble
# retrieving git-lfs artifacts.

# ESnet-FABRIC gateway is at STAR, so the closer we are to it, the lower
# the latency and loss.

site_list_override = None

# if you want to force a site list instead of using random
#site_list_override = ['SRI', 'UCSD', 'CLEM']

# (super)core sites - should be low loss
#site_list_override = ['STAR', 'SALT', 'KANS', 'NEWY', 'WASH', 'LOSA', 'DALL', 'ATLA']

# grouped around STAR with optical connections to the backbone - should be low loss
#site_list_override = ['STAR', 'INDI', 'NCSA', 'MICH']

# high capacity sites (may have losses at high bandwidth)
# site_list_override = ['STAR', 'INDI', 'NCSA', 'TACC', 'UCSD', 'PSC']

# these we always exclude
site_exclude_list = ['EDUKY', 'EDC']

# how many workers do we want?
number_of_workers = 6

# base distro 'ubuntu2[012]' or 'rocky[89]'
distro_name = 'ubuntu22'

# map from distro to image name
images = {
    'ubuntu20': 'default_ubuntu_20',
    'ununtu21': 'default_ubuntu_21',
    'ubuntu22': 'default_ubuntu_22',
    'rocky8': 'default_rocky_8',
    'rocky9': 'default_rocky_9',
}

# note that the below is distribution specific ('ubuntu' for ubuntu and so on)
home_location = {
    'ubunt': '/home/ubuntu',
    'rocky' : '/home/rocky'
}[distro_name[:5]]

# worker dimensions
node_attribs = {
    'cores': 64,
    'disk': 100,
    'ram': 128,
    'image': images[distro_name]
}

# slice name
slice_name = f'{number_of_workers}-node JIRIAF Slice using {distro_name}'

# these are subnets we want to be able to route to/from
# The list has the form ['192.168.100.0/24', '10.100.1.0/24']
external_subnets = []

# these are the lists of destination ports we allow to be open on the FABNet interface
# for incoming traffic from different subnets. The dictionary has the form
# { '192.168.100.0/24': [22, 443] } - the key is the source subnet and the value
# is a list of destination ports allowed from that subnet
open_ports = {
}

# additional accounts and their public keys - they get sudo rights and docker,
# their public keys are expected to reside under ssh-keys/ in a file
# named after the account.
# The list has the form of ['user1', 'user2'] where user1 and user2 accounts
# will be created on the system. Under ssh-keys/ there should be two files
# named 'user1' and 'user2' each containing the SSH public key for that user. 
accounts = []

#
# SHOULDN'T NEED TO EDIT BELOW
#
# Preamble
import json
from datetime import datetime
from datetime import timezone
from datetime import timedelta

from fabrictestbed_extensions.fablib.fablib import FablibManager as fablib_manager

from ipaddress import ip_address, IPv4Address, IPv6Address, IPv4Network, IPv6Network
import ipaddress

fablib = fablib_manager()             
fablib.show_config();

# gets prepended to site name - this network is per site
net_name_prefix = 'fabnetv4ext'

# this is the NIC to use
nic_model = 'NIC_Basic'

def execute_single_node(node, commands):
    for command in commands:
        print(f'\tExecuting "{command}" on node {node.get_name()}')
        #stdout, stderr = node.execute(command, quiet=True, output_file=node.get_name() + '_install.log')
        stdout, stderr = node.execute(command)
    if not stderr and len(stderr) > 0:
        print(f'Error encountered with "{command}": {stderr}')
        
def execute_commands(node, commands):
    if isinstance(node, list):
        for n in node:
            execute_single_node(n, commands)
    else:
        execute_single_node(node, commands)

def make_node_name(site_name, node_idx):
    return '_'.join([f"Node{node_idx}", site_name])

def make_net_name(site_name):
    return '_'.join([net_name_prefix, site_name])

# return slice with one node on one site
def starter_slice(site_name):
    node_name = make_node_name(site_name, 1)
    net_name = make_net_name(site_name)

    slice = fablib.new_slice(name=slice_name)
    node = slice.add_node(name=node_name, site=site_name, **node_attribs)

    # postboot configuration is under 'post-boot' directory
    node.add_post_boot_upload_directory('post-boot','.')
    node.add_post_boot_execute(f'chmod +x post-boot/jiriaf-post-boot.sh && ./post-boot/jiriaf-post-boot.sh')
    
    # attach to network
    nic_interface = node.add_component(model=nic_model, name='_'.join([node_name, nic_model, 'nic'])).get_interfaces()[0]
    net = slice.add_l3network(name=net_name, interfaces=[nic_interface], type='IPv4Ext')

    return slice

def add_node_to_slice(site_name, node_idx, inc, slice):

    net_name = make_net_name(site_name)

    while inc > 0:
        node_name = make_node_name(site_name, node_idx)
        node_idx += 1
        
        node = slice.add_node(name=node_name, site=site_name, **node_attribs)
    
        # postboot configuration is under 'post-boot' directory
        node.add_post_boot_upload_directory('post-boot','.')
        node.add_post_boot_execute(f'chmod +x post-boot/jiriaf-post-boot.sh && ./post-boot/jiriaf-post-boot.sh')
    
        nic_interface = node.add_component(model=nic_model, name='_'.join([node_name, nic_model, 'nic'])).get_interfaces()[0]
        
        # attach to a network, create network if needed
        net = slice.get_network(name=net_name)
        if net is None:
            net = slice.add_l3network(name=net_name, type='IPv4Ext')
            
        net.add_interface(nic_interface)
        inc -= 1

    return None

def check_modify(slice, selected_site_list, nodes_in_slice, expected_to_add):

    success = True
    idx = 1
    while(expected_to_add >= idx):
        # find sliver reservation for new node
        node_sliver = slice.list_slivers(fields=['name', 'state'], 
                                         filter_function=lambda x: x['type'] == 'node' and 
                                             x['name'] == make_node_name(selected_site_list[0], nodes_in_slice + idx) and 
                                             x['state'] == 'Active')
        # if it is none - it failed
        if node_sliver is None:
            success = False
            break
        else:
            idx += 1

    return success

# until fablib fixes this
def get_management_os_interface(node) -> str or None:
        """
        Gets the name of the management interface used by the node's
        operating system. 

        :return: interface name
        :rtype: String
        """
        stdout, stderr = node.execute("sudo ip -j route list", quiet=True)
        stdout_json = json.loads(stdout)

        for i in stdout_json:
            if i["dst"] == "default":
                return i["dev"]

        stdout, stderr = node.execute("sudo ip -6 -j route list", quiet=True)
        stdout_json = json.loads(stdout)

        for i in stdout_json:
            if i["dst"] == "default":
                return i["dev"]

        return None

## Helpers

If you ever forget which images are available, run this cell:

In [None]:
# List available images (this step is optional)
available_images = fablib.get_image_names()

print(f'Available images are: {available_images}')

## Prepare to create a new slice (skip if exists)

In [None]:
# list all slices I have running
output_dataframe = fablib.list_slices(output='pandas')
if output_dataframe:
    print(output_dataframe)
else:
    print('No active slices under this project')

In [None]:
# Identify sites in continental US we want to use (NOOP if override is set)
lon_west=-124.3993243
lon_east=-69.9721573
candidate_sites = 7
free_nodes_worth = 3 # how many nodes worth are we looking per site

# get a list of random sites, avoiding thos on the exclude list
# unless there is an override
if site_list_override is None:
    selected_site_list = fablib.get_random_sites(count=candidate_sites, avoid=site_exclude_list,
                                            filter_function=lambda x: x['location'][1] < lon_east
                                            and x['location'][1] > lon_west 
                                            and x['cores_available'] > free_nodes_worth * node_attribs['cores']
                                            and x['ram_available'] > free_nodes_worth * node_attribs['ram'] 
                                            and x['disk_available'] > free_nodes_worth * node_attribs['disk']) 
else:
    selected_site_list = site_list_override

if selected_site_list:
    print(f'Selected sites are {selected_site_list}')
else:
    print('Unable to find a sites matching the requirements')


## Create slice iteratively (skip if exists)

We may or may not get all the nodes we want immediately - we use iteration with slice modify to get to the max/desired number of nodes across the selected sites.

### Create Starter Slice

In [None]:
# we start by establishing a slice with one node at some site, we keep track which sites we failed 
# and don't try those again

keep_trying = True
succeeded = False

site_list_iter = iter(selected_site_list)
failed_sites = {}
site_name = None

while keep_trying:

    try:
        site_name = next(site_list_iter)
        print(f'Trying site {site_name} from {selected_site_list}')
        
        # define a starter slice
        slice = starter_slice(site_name)

        print(f'Submitting starter slice "{slice_name}" with first node on site {site_name}')
        slice_id = slice.submit()

        # check the state of this slice
        slices = fablib.get_slices(excludes=[], slice_id=slice_id)
        if slices[0].get_state() == 'Dead':
            print(f'Failed on site {site_name}, proceeding')
        else:
            print(f'Succeeded on site {site_name} with state {slices[0].get_state()}')
            keep_trying = False
            succeeded = True
    except StopIteration: 
        print('No more sites to look at, exiting')
        keep_trying = False
    except Exception as e:
        print(f'Unexpected exception {e}, exiting')
        keep_trying = False

if succeeded:
    print(f'Succeeded in creating a slice on {site_name}, will avoid sites {failed_sites}')
    selected_site_list = list(filter(lambda x: x not in failed_sites, selected_site_list))
    print(f'Proceeding with sites {selected_site_list}')

### Modify the Slice to add More Workers

Now that the base slice is created we will iteratively add workers on sites one at a time using first-fit policy until we get to the desired number of workers or run out of sites.

In [None]:
remaining_workers = number_of_workers - 1 # we created one already
node_idx = 2
node_increment = 2
nodes_in_slice = 1

while remaining_workers > 0 and len(selected_site_list) > 0:
    slice = fablib.get_slice(name=slice_name)
    
    try:
        site_name = selected_site_list[0]
        print(f'There are {remaining_workers} remaining workers to create. Trying site {site_name} from {selected_site_list}')
        expected_to_add = node_increment if remaining_workers >= node_increment else remaining_workers
        add_node_to_slice(site_name, node_idx, expected_to_add, slice)
        
        print(f'Submitting slice modification to "{slice_name}" for site {site_name}')
        slice_id = slice.modify()
        
        # check the state of this slice
        slice = fablib.get_slice(name=slice_name)

        if check_modify(slice, selected_site_list, nodes_in_slice, expected_to_add):
            print(f'Succeeded on site {site_name}.')
            # successfully provisioned
            node_idx += expected_to_add
            remaining_workers -= expected_to_add
            nodes_in_slice += expected_to_add
        else:
            print(f'Failed to provision on site {site_name}.')
            # this site is full, moving on
            selected_site_list.remove(site_name)            
    except Exception as e:
        remaining_workers = -1
        print(f'Unexpected exception {e}, exiting')
        break

if remaining_workers == 0:
    print('Succeeded in creating all workers')
else:
    print(f'Unable to create {remaining_workers}')


## Get Slice Details (always execute)

The following code sets up data structures so all the follow up cells work properly. Execute it regardless of whether you just created the slice or coming back to an existing slice.

In [None]:
def find_net(net_list, name):
    for net in net_list:
        if net.get_name() == name:
            return net
    return None

# get slice details 
slice = fablib.get_slice(name=slice_name)

a = slice.show()
nets = slice.list_networks()
nodes = slice.list_nodes()

# arrange nodes and network services by site for future convenience
net_objects = slice.get_networks()
node_objects = slice.get_nodes()
available_ip_cnt = 10

slivers_by_site = dict()

print('Arranging nodes and networks by site and getting available IP addresses')
for node in node_objects:
    node_site = node.get_site()
    if not slivers_by_site.get(node_site):
        slivers_by_site[node_site] = dict()
        slivers_by_site[node_site]['nodes'] = set()
        slivers_by_site[node_site]['net'] = find_net(net_objects, make_net_name(node_site))
    slivers_by_site[node_site]['nodes'].add(node)

print('Listing public IP addresses per service')
for net in net_objects:
    print(f'{net.get_name()} has {net.get_public_ips()}')


## Perform Hardening and Network Configuration Opening to Outside World

### Set up routing

In [None]:
# allocate externally routable IP addresses in each site network services
# it is NORMAL to see 'IP addresses were updated due to conflicts'
for site_name, site_slivers  in slivers_by_site.items():
    print(f'Processing {site_name}')
    site_net = site_slivers['net']
    site_nodes = site_slivers['nodes']
    site_slivers['ips'] = site_net.get_available_ips(count=len(site_nodes))
    print(f'Requesting available IPs to be publicly routable: {site_slivers["ips"]}')
    site_net.make_ip_publicly_routable(ipv4=[str(x) for x in site_slivers['ips']])

slice.submit()

In [None]:
# get slice details 
slice = fablib.get_slice(name=slice_name)

# check the results
for site_name, site_slivers  in slivers_by_site.items():
    print(f'Processing {site_name}')
    site_net = site_slivers['net']
    site_nodes = site_slivers['nodes']
    print(f'Public IPs are: {site_net.get_public_ips()}')

In [None]:
# configure node interfaces with these IP addresses
for site_name, site_slivers in slivers_by_site.items():
    print(f'Processing {site_name}')
    site_net = site_slivers['net']
    site_nodes = site_slivers['nodes']
    site_addrs = site_net.get_public_ips()
    for node, addr in zip(site_nodes, site_addrs):
        print(f'  Adding address {addr} to node {node.get_name()} in subnet {site_net.get_subnet()}')
        # make sure the interface is UP (in rare cases comes up in DOWN state)
        node_iface = node.get_interface(network_name = site_net.get_name())
        execute_single_node(node, [f'sudo ip link set {node_iface.get_os_interface()} up'])
        node_iface.ip_addr_add(addr=addr, subnet=site_net.get_subnet())


In [None]:
# configure inter-site routing if you have multiple sites
for site_name_from, site_slivers_from in slivers_by_site.items():
    for site_name_to, site_slivers_to in slivers_by_site.items():
        if site_name_from == site_name_to:
            continue
        # make sure nodes in site_name_from have a route to site_name_to subnet
        subnet = site_slivers_to['net'].get_subnet()
        gateway = site_slivers_from['net'].get_gateway()
        for node in site_slivers_from['nodes']:
            print(f'Setting up route to {subnet} via {gateway} on node {node.get_name()}')
            node.ip_route_add(subnet=subnet, gateway=gateway)

In [None]:
# configure global routing to indicated subnets 
for site_name, site_slivers in slivers_by_site.items():
    gateway = site_slivers['net'].get_gateway()
    for node in site_slivers['nodes']:
        print(f'Setting up routes on {node.get_name()}')
        for subnet in external_subnets:
            print(f'Setting up route to {subnet} via {gateway} on node {node.get_name()}')
            execute_single_node(node, [f'sudo ip route add {subnet} via {gateway}'])

### Setup Firewall (assuming firewalld is used regardless of distro)

In [None]:
# walk the nodes, add lo and management interface to 'trusted' zone where everything is allowed
# add dataplane interface into 'public' zone where only 'open ports' from specific sources is allowed

for site_name, site_slivers in slivers_by_site.items():
    site_net = site_slivers['net']
    for node in site_slivers['nodes']:
        print(f'Setting up firewalld on node {node.get_name()}')
        # note we are calling our own function - as of 1.7.0 fablib's node.get_management_os_interface()
        # has a bug where it doesn't find management interface on IPv6 sites
        mgmt_iface_name = get_management_os_interface(node)
        if mgmt_iface_name is None:
            print('Unable to determine management interface, skipping')
            continue
        data_iface = node.get_interface(network_name=site_net.get_name())
        data_iface_name = data_iface.get_os_interface()
        print(f'  Adding {mgmt_iface_name} and lo to trusted zone and {data_iface_name} to public zone')
        commands = [
            f'sudo firewall-cmd --permanent --zone=public --add-interface={data_iface_name}',
            f'sudo firewall-cmd --permanent --zone=trusted --add-interface=lo',
            f'sudo firewall-cmd --permanent --zone=trusted --add-interface={mgmt_iface_name}',
            f'for i in $(sudo firewall-cmd --zone=public --list-services); do sudo firewall-cmd --zone=public --permanent --remove-service=$i; done',
        ]
        for subnet, portlist in open_ports.items():
            for port in portlist:
                commands.append(f'sudo firewall-cmd --permanent --zone=public --add-rich-rule=\'rule family=\"ipv4\" source address=\"{subnet}\" port protocol=\"tcp\" port=\"{port}\" accept\'')
        commands.append(f'sudo firewall-cmd --reload')
        commands.append(f'sudo firewall-cmd --list-all --zone=public')
        execute_single_node(node, commands)
        
        

## Tune Buffers and MTUs

In order to have good performance we need to
- Make the UDP send/receive socket buffer size limit larger (applications are assumed to know how to make their buffers larger up to this limit)
- Set MTU to 9k and test with DF=0 ping

In [None]:
# setup UDP socket buffer sizes to 512M
commands = [
    f"sudo sysctl net.core.rmem_max=536870912",
    f"sudo sysctl net.core.wmem_max=536870912",
    f"sysctl net.core.wmem_max net.core.rmem_max"
]
# walk the nodes
for site_name, site_slivers in slivers_by_site.items():
    for node in site_slivers['nodes']:
        execute_single_node(node, commands)

In [None]:
# set 9k MTU on dataplane interfaces
mtu=9000

for site_name, site_slivers in slivers_by_site.items():
    site_net = site_slivers['net']
    for node in site_slivers['nodes']:
        data_iface = node.get_interface(network_name=site_net.get_name())
        data_iface_name = data_iface.get_os_interface()
        execute_single_node(node, [f"sudo ip link set dev {data_iface_name} mtu {mtu}"])

In [None]:
# run a no-DF test from every node to the first public address of the first site on the list
first_ip = list(slivers_by_site.items())[0][1]['net'].get_public_ips()[0]
# you can replace first_ip with the IP of a load balancer, but be careful not to interfere
# with a running experiment as this uses ping flood 
#first_ip = <use production LB or other address>

for site_name, site_slivers in slivers_by_site.items():
    for node in site_slivers['nodes']:
        print(f'Node {node.get_name()} pinging {first_ip}')
        execute_single_node(node, [f"sudo ping -q -f -s 8972 -c 100 -M do {first_ip}"])

## Customize Nodes

Customize node setup

### Create additional accounts

Use the `accounts` list and public SSH keys stored in ssh-keys/ folder to create accounts and give them sudo rights.

In [None]:
# make sure for every account in `accounts` there is a matching file with public SSH key
# under ssh-keys
for site_name, site_slivers in slivers_by_site.items():
    for node in site_slivers['nodes']:
        for account in accounts:
            print(f'Creating account {account} on node {node.get_name()}')
            # note this uploads into /home/[ubuntu/rocky]/ssh-keys
            node.upload_directory('ssh-keys', '.')
            # create an account with home directory
            # give sudo rights
            # copy key file into /home/{account}/.ssh/authorized_keys
            commands = [
                f'sudo useradd -m -G sudo,docker {account} -c "{account} account"',
                f'sudo mkdir /home/{account}/.ssh',
                f'sudo cp ssh-keys/{account} /home/{account}/.ssh/authorized_keys',
                f'sudo chown -R {account}:{account} /home/{account}/.ssh/',
            ]
            execute_single_node(node, commands)

### Add additional software

## Manage the slice

### Extend by two weeks

In [None]:
# Set end host to now plus 14 days
end_date = (datetime.now(timezone.utc) + timedelta(days=14)).strftime("%Y-%m-%d %H:%M:%S %z")

try:
    slice = fablib.get_slice(name=slice_name)

    slice.renew(end_date)
except Exception as e:
    print(f"Exception: {e}")

### Delete

In [None]:
slice = fablib.get_slice(slice_name)
slice.delete()