# This notebook has some utilities to manage the already deployed cluster. 
- uninstall DYNAMOS
- trigger VFL
- ... 

## FABlib API References Examples

- [fablib.show_config](https://fabric-fablib.readthedocs.io/en/latest/fablib.html#fabrictestbed_extensions.fablib.fablib.FablibManager.show_config)
- [fablib.list_sites](https://fabric-fablib.readthedocs.io/en/latest/fablib.html#fabrictestbed_extensions.fablib.fablib.FablibManager.list_sites)
- [fablib.list_hosts](https://fabric-fablib.readthedocs.io/en/latest/fablib.html#fabrictestbed_extensions.fablib.fablib.FablibManager.list_hosts)
- [fablib.new_slice](https://fabric-fablib.readthedocs.io/en/latest/fablib.html#fabrictestbed_extensions.fablib.fablib.FablibManager.new_slice)
- [slice.add_node](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.add_node)
- [slice.submit](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.submit)
- [slice.get_nodes](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.get_nodes)
- [slice.list_nodes](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.list_nodesß)
- [slice.show](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.show)
- [node.execute](https://fabric-fablib.readthedocs.io/en/latest/node.html#fabrictestbed_extensions.fablib.node.Node.execute)
- [slice.delete](https://fabric-fablib.readthedocs.io/en/latest/slice.html#fabrictestbed_extensions.fablib.slice.Slice.delete) 

In [1]:
%%time
import datetime
import json
import asyncio

from fabrictestbed_extensions.fablib.fablib import FablibManager as fablib_manager

fablib = fablib_manager()

fablib.show_config();

User: koufalex@gmail.com bastion key is valid!
Configuration is valid


0,1
Orchestrator,orchestrator.fabric-testbed.net
Credential Manager,cm.fabric-testbed.net
Core API,uis.fabric-testbed.net
Artifact Manager,artifacts.fabric-testbed.net
Token File,/home/fabric/.tokens.json
Project ID,49f65ad7-d8a2-4ab9-8ca0-ba777a2e0ea2
Bastion Host,bastion.fabric-testbed.net
Bastion Username,koufalex_0000215529
Bastion Private Key File,/home/fabric/work/fabric_config/fabric_bastion_key
Slice Public Key File,/home/fabric/work/fabric_config/slice_key.pub


CPU times: user 2.29 s, sys: 314 ms, total: 2.6 s
Wall time: 4.94 s


In [3]:
slice = fablib.get_slice(name="DYNAMOS-on-FABRIC");
nodes = slice.get_nodes();


In [5]:
# Print the necessary information
try:
    # Get slice nodes
    for node in slice.get_nodes():
        print(f"Node: {node.get_name()}")
        # Get the original SSH command
        original_ssh_command = node.get_ssh_command()
        # Print SSH commands to get into the nodes
        print(f"  SSH Command from FABRIC: {original_ssh_command}")
        # Replace the file paths in the SSH command
        updated_ssh_command = original_ssh_command.replace(
            "/home/fabric/work/fabric_config/slice_key", "~/.ssh_fabric/slice_key"
        ).replace(
            "/home/fabric/work/fabric_config/ssh_config", "ssh_config"
        )
        # Print the updated SSH command
        print(f"  SSH Command locally (ensuring it is saved according to below steps): {updated_ssh_command}")
    
except Exception as e:
    print(f"Fail: {e}")
    traceback.print_exc()

Node: control
  SSH Command from FABRIC: ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:400:a100:3070:f816:3eff:fe84:397d
  SSH Command locally (ensuring it is saved according to below steps): ssh -i ~/.ssh_fabric/slice_key -F ssh_config ubuntu@2001:400:a100:3070:f816:3eff:fe84:397d
Node: dynamos
  SSH Command from FABRIC: ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:400:a100:3070:f816:3eff:fe1b:63f7
  SSH Command locally (ensuring it is saved according to below steps): ssh -i ~/.ssh_fabric/slice_key -F ssh_config ubuntu@2001:400:a100:3070:f816:3eff:fe1b:63f7
Node: server
  SSH Command from FABRIC: ssh -i /home/fabric/work/fabric_config/slice_key -F /home/fabric/work/fabric_config/ssh_config ubuntu@2001:400:a100:3070:f816:3eff:fe08:6a6e
  SSH Command locally (ensuring it is saved according to below steps): ssh -i ~/.ssh_fabric/slice_key -F ssh_config ubuntu@2001:400

In [4]:
def get_ip(node):
    interface = node.get_interface(network_name=f"Network-{node.get_site()}")
    return interface.get_ip_addr()

nodes_dict= dict()

for node in nodes[:]:
    ip = get_ip(node)
    name = node.get_name()
    nodes_dict[name] = {"ip": ip, "node": node}
    print(f"{name}: {ip}")

print(nodes_dict)

control: 10.137.3.2
dynamos: 10.137.3.3
server: 10.137.3.4
clientone: 10.137.3.5
clienttwo: 10.137.3.6
clientthree: 10.137.3.7
{'control': {'ip': '10.137.3.2', 'node': <fabrictestbed_extensions.fablib.node.Node object at 0x7ab589e09050>}, 'dynamos': {'ip': '10.137.3.3', 'node': <fabrictestbed_extensions.fablib.node.Node object at 0x7ab5a4f1dad0>}, 'server': {'ip': '10.137.3.4', 'node': <fabrictestbed_extensions.fablib.node.Node object at 0x7ab5544f6d50>}, 'clientone': {'ip': '10.137.3.5', 'node': <fabrictestbed_extensions.fablib.node.Node object at 0x7ab56c063350>}, 'clienttwo': {'ip': '10.137.3.6', 'node': <fabrictestbed_extensions.fablib.node.Node object at 0x7ab554379cd0>}, 'clientthree': {'ip': '10.137.3.7', 'node': <fabrictestbed_extensions.fablib.node.Node object at 0x7ab5661d2d50>}}


In [None]:
# This is for resetting the kubespray cluster. 
# Use this if you are troubleshooting your Kubernetes cluster
# and you want to redeploy fresh.

# nodes_dict['control']['node'].upload_file(local_file_path="node_scripts/reset_kubespray.sh", remote_file_path="reset.sh");
# nodes_dict['control']['node'].execute(f"chmod +x reset.sh && ./reset.sh");

In [None]:
# Add the relevant etcd data to the dynamos node (update etcd agreements etc)

# help(nodes_dict['dynamos']['node'])
# upload etcd files from filesystem instead of reading them from github
# nodes_dict['dynamos']['node'].upload_directory(local_directory_path="../configuration/etcd_launch_files", remote_directory_path="./")
# nodes_dict['dynamos']['node'].execute("ls etcd_launch_files")

# nodes_dict['dynamos']['node'].upload_file(local_file_path="node_scripts/define_etcd_data_local.sh", remote_file_path="define_etcd_data_local.sh");
# nodes_dict['dynamos']['node'].execute(f"chmod +x define_etcd_data_local.sh && ./define_etcd_data_local.sh");

In [16]:
%%time
# Run the trigger script
# note: could make it so that ignores the output > null 
nodes_dict['control']['node'].upload_file(local_file_path="node_scripts/trigger_VFL.sh", remote_file_path="trigger_VFL.sh")

nodes_dict['control']['node'].execute("bash trigger_VFL.sh")

[31m  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:[0m[31m--:-- --:--:--     0*   Trying 10.137.3.3:31203...
* Connected to 10.137.3.3 (10.137.3.3) port 31203
> POST /api/v1/requestApproval HTTP/1.1
> Host: api-gateway.api-gateway.svc.cluster.local
> User-Agent: curl/8.5.0
> Accept: */*
[0m[31m> Content-Type: application/json
> Content-Length: 408
> 
} [408 bytes data]
100   408    0     0  100   408      0      6  0:01:08  0:00:59  0:00:09     0[0m  407[0m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m[31m<html>
<head><title>504 Gateway Time-out</title></head>
<body>
<center><h

('<html>\r\n<head><title>504 Gateway Time-out</title></head>\r\n<body>\r\n<center><h1>504 Gateway Time-out</h1></center>\r\n<hr><center>nginx/1.25.1</center>\r\n</body>\r\n</html>\r\n',
 '  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n                                 Dload  Upload   Total   Spent    Left  Speed\n\r  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0*   Trying 10.137.3.3:31203...\n* Connected to 10.137.3.3 (10.137.3.3) port 31203\n> POST /api/v1/requestApproval HTTP/1.1\r\n> Host: api-gateway.api-gateway.svc.cluster.local\r\n> User-Agent: curl/8.5.0\r\n> Accept: */*\r\n> Content-Type: application/json\r\n> Content-Length: 408\r\n> \r\n} [408 bytes data]\n\r100   408    0     0  100   408      0    407  0:00:01  0:00:01 --:--:--   407\r100   408    0     0  100   408      0    203  0:00:02  0:00:02 --:--:--   203\r100   408    0     0  100   408      0    135  0:00:03  0:00:03 --:--:--   135\r100   408    0     

In [19]:
%%time
# TODO: read the metrics based on the other script 

nodes_dict['control']['node'].upload_file(local_file_path="../scripts/retrieve_data.sh", remote_file_path="retrieve_data.sh")
nodes_dict['control']['node'].execute(f"bash retrieve_data.sh {2}")


70.86614173228347
71.65354330708661
71.9910011248594
72.55343082114736
72.77840269966255
72.77840269966255
73.3408323959505
73.67829021372329
73.56580427446569
73.56580427446569
74.01574803149606
74.12823397075366
74.46569178852643
74.80314960629921
74.69066366704162
74.69066366704162
75.14060742407199
75.14060742407199
75.0281214848144
CPU times: user 56.2 ms, sys: 13.6 ms, total: 69.8 ms
Wall time: 5.49 s


('70.86614173228347\n71.65354330708661\n71.9910011248594\n72.55343082114736\n72.77840269966255\n72.77840269966255\n73.3408323959505\n73.67829021372329\n73.56580427446569\n73.56580427446569\n74.01574803149606\n74.12823397075366\n74.46569178852643\n74.80314960629921\n74.69066366704162\n74.69066366704162\n75.14060742407199\n75.14060742407199\n75.0281214848144\n',
 '')

In [7]:
# Do we need to do it everytime?? I don;t think so but I should test 

slice = fablib.get_slice(name="DYNAMOS-on-FABRIC");
nodes = slice.get_nodes();

for node in nodes:
    ssh_command = node.get_ssh_command().replace(
        "-i /home/fabric/work/fabric_config/slice_key", "-i ~/.ssh/keys/FABRIC-slice_key"
    ).replace(
        "-F /home/fabric/work/fabric_config/ssh_config ", ""
    )
    
    print(ssh_command);

Uploading the node setup...


[<SFTPAttributes: [ size=1222 uid=1000 gid=1000 mode=0o100664 atime=1758572167 mtime=1758572168 ]>,
 <SFTPAttributes: [ size=1222 uid=1000 gid=1000 mode=0o100664 atime=1758572165 mtime=1758572165 ]>,
 <SFTPAttributes: [ size=1222 uid=1000 gid=1000 mode=0o100664 atime=1758572165 mtime=1758572165 ]>,
 <SFTPAttributes: [ size=1222 uid=1000 gid=1000 mode=0o100664 atime=1758572165 mtime=1758572165 ]>,
 <SFTPAttributes: [ size=1222 uid=1000 gid=1000 mode=0o100664 atime=1758572165 mtime=1758572165 ]>,
 <SFTPAttributes: [ size=1222 uid=1000 gid=1000 mode=0o100664 atime=1758572167 mtime=1758572168 ]>]

In [23]:

# Configure DYNAMOS for the FABRIC nodes
# agents_string = ",".join(agents)
# thirdparties_string = ",".join(thirdparties)


# nodes_dict['control']['node'].upload_file(local_file_path="node_scripts/configure_dynamos.sh", remote_file_path="configure_dynamos.sh");
# nodes_dict['control']['node'].execute(f"chmod +x configure_dynamos.sh && ./configure_dynamos.sh {agents_string} {thirdparties_string}");

Adding agents...
- agent 'server'
- agent 'clientone'
- agent 'clienttwo'
- agent 'clientthree'

Adding third parties...


In [24]:
# Optionally override the installation scripts 
# nodes_dict['control']['node'].upload_file(local_file_path="../configuration/dynamos-configuration.sh", remote_file_path="/home/ubuntu/DYNAMOS/configuration/dynamos-configuration.sh")
# nodes_dict['control']['node'].upload_file(local_file_path="../configuration/fill-rabbit-pvc.sh", remote_file_path="/home/ubuntu/DYNAMOS/configuration/fill-rabbit-pvc.sh");


In [15]:
# install DYNAMOS
nodes_dict['control']['node'].execute(f"~/DYNAMOS/configuration/dynamos-configuration.sh")

DYNAMOS configuration v0.1.2
Setting up paths...
definitions_example.json copied over definitions.json to ensure a clean file
Generating RabbitMQ password...
Replacing tokens...
Installing namespaces...
Release "namespaces" does not exist. Installing it now.
NAME: namespaces
LAST DEPLOYED: Tue Sep 23 11:33:50 2025
NAMESPACE: default
STATUS: deployed
REVISION: 1
TEST SUITE: None
Preparing PVC
fill-pvc v0.1.2
pod/temp-pod created
pod/temp-pod-orch created
Waiting for temp-pod to be Running...
pod/temp-pod condition met
pod/temp-pod-orch condition met
pod "temp-pod" deleted
pod "temp-pod-orch" deleted
Installing Prometheus...
"prometheus-community" already exists with the same configuration, skipping
Hang tight while we grab the latest from your chart repositories...
...Successfully got an update from the "prometheus-community" chart repository
Update Complete. ⎈Happy Helming!⎈
Release "prometheus" does not exist. Installing it now.
NAME: prometheus
LAST DEPLOYED: Tue Sep 23 11:34:46 2025

('DYNAMOS configuration v0.1.2\nSetting up paths...\ndefinitions_example.json copied over definitions.json to ensure a clean file\nGenerating RabbitMQ password...\nReplacing tokens...\nInstalling namespaces...\nRelease "namespaces" does not exist. Installing it now.\nNAME: namespaces\nLAST DEPLOYED: Tue Sep 23 11:33:50 2025\nNAMESPACE: default\nSTATUS: deployed\nREVISION: 1\nTEST SUITE: None\nPreparing PVC\nfill-pvc v0.1.2\npod/temp-pod created\npod/temp-pod-orch created\nWaiting for temp-pod to be Running...\npod/temp-pod condition met\npod/temp-pod-orch condition met\npod "temp-pod" deleted\npod "temp-pod-orch" deleted\nInstalling Prometheus...\n"prometheus-community" already exists with the same configuration, skipping\nHang tight while we grab the latest from your chart repositories...\n...Successfully got an update from the "prometheus-community" chart repository\nUpdate Complete. ⎈Happy Helming!⎈\nRelease "prometheus" does not exist. Installing it now.\nNAME: prometheus\nLAST DEP

In [13]:
# Optional to clean up: uninstall DYNAMOS

command = "helm uninstall agents api-gateway core orchestrator namespaces prometheus thirdparties"
# nodes_dict['control']['node'].execute(command)

release "agents" uninstalled
release "api-gateway" uninstalled
release "core" uninstalled
release "orchestrator" uninstalled
These resources were kept due to the resource policy:
[Namespace] core
[Namespace] orchestrator
[Namespace] clienttwo
[Namespace] clientthree
[Namespace] uva
[Namespace] vu
[Namespace] surf
[Namespace] ingress
[Namespace] api-gateway
[Namespace] alpha
[Namespace] server
[Namespace] clientone

release "namespaces" uninstalled
release "prometheus" uninstalled
release "thirdparties" uninstalled


('release "agents" uninstalled\nrelease "api-gateway" uninstalled\nrelease "core" uninstalled\nrelease "orchestrator" uninstalled\nThese resources were kept due to the resource policy:\n[Namespace] core\n[Namespace] orchestrator\n[Namespace] clienttwo\n[Namespace] clientthree\n[Namespace] uva\n[Namespace] vu\n[Namespace] surf\n[Namespace] ingress\n[Namespace] api-gateway\n[Namespace] alpha\n[Namespace] server\n[Namespace] clientone\n\nrelease "namespaces" uninstalled\nrelease "prometheus" uninstalled\nrelease "thirdparties" uninstalled\n',
 '')

In [14]:
# Uninstall client and server namespaces (sometimes is needed)
command = "kubectl delete namespace clientone clienttwo clientthree server"
# nodes_dict['control']['node'].execute(command)

namespace "clientone" deleted
namespace "clienttwo" deleted
namespace "clientthree" deleted
namespace "server" deleted


('namespace "clientone" deleted\nnamespace "clienttwo" deleted\nnamespace "clientthree" deleted\nnamespace "server" deleted\n',
 '')

In [None]:
# Optional delete etcd PVCs 
# nodes_dict['control']['node'].execute("kubectl get pvc --all-namespaces")

# nodes_dict['control']['node'].execute("kubectl delete pvc etcd-data-etcd-0 -n core")
# nodes_dict['control']['node'].execute("kubectl delete pvc etcd-data-etcd-1 -n core")
# nodes_dict['control']['node'].execute("kubectl delete pvc etcd-data-etcd-2 -n core")


# nodes_dict['control']['node'].execute("kubectl get pvc --all-namespaces")