Skip to content

Commit

Permalink
Merge 5eff72f into ddaeb14
Browse files Browse the repository at this point in the history
  • Loading branch information
pritchardn committed Aug 8, 2022
2 parents ddaeb14 + 5eff72f commit 3ce2264
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 49 deletions.
92 changes: 43 additions & 49 deletions daliuge-translator/dlg/dropmake/web/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,10 @@ async function initiateDeploy(method, selected, clickedName){
$("#gen_helm_button").val("Generate & Deploy Physical Graph")
$("#dlg_helm_deploy").prop("checked", true)
$("#pg_helm_form").submit()
}else if(method === "rest"){
}else if(method === "rest-ood"){
restDeploy()
} else if(method === "rest-direct"){
directRestDeploy()
}
}

Expand Down Expand Up @@ -334,14 +336,17 @@ function fillOutSettings() {

var directOption = '<option value="direct">Direct</option>'
var helmOption = '<option value="helm">Helm</option>'
var restOption = '<option value="rest">Rest</option>'
var restOODOption = '<option value="rest-ood">Rest-OOD</option>'
var restDirectOption = '<option value="rest-direct">Rest-Direct</option>'

if(element.deployMethod === "direct"){
directOption = '<option value="direct" selected="true">Direct</option>'
}else if(element.deployMethod === "helm"){
helmOption = '<option value="helm" selected="true">Helm</option>'
}else if(element.deployMethod === "rest"){
restOption = '<option value="rest" selected="true">Rest</option>'
}else if(element.deployMethod === "rest-ood"){
restOODOption = '<option value="rest-ood" selected="true">Rest-OOD</option>'
} else if(element.deployMethod === "rest-direct"){
restDirectOption = '<option value="rest-direct" selected="true">Rest-Direct</option>'
}

var deplpoyMethodRow = '<div class="input-group">'+
Expand All @@ -351,7 +356,8 @@ function fillOutSettings() {
'<div class="settingsInputTooltip tooltip tooltipBottom form-control" data-text="Deploy Method"><select class="deployMethodMethod">'+
directOption+
helmOption+
restOption+
restOODOption+
restDirectOption+
'</select></div>'+
'<input type="text" class="form-control deployMethodActive" value="'+element.active+'">'+
'<button class="btn btn-secondary btn-sm tooltip tooltipBottom" data-text="Delete Deploy Option" type="button" onclick="removeDeployMethod(event)"><i class="material-icons md-24">delete</i></button>'+
Expand All @@ -365,7 +371,8 @@ function addDeployMethod(){

var directOption = '<option value="direct" selected="true">Direct</option>'
var helmOption = '<option value="helm">Helm</option>'
var restOption = '<option value="rest">Rest</option>'
var restOODOption = '<option value="rest-ood">Rest-OOD</option>'
var restDirectOption = '<option value="rest-direct">Rest-Direct</option>'

var deplpoyMethodRow = '<div class="input-group">'+
'<div class="settingsInputTooltip tooltip tooltipBottom form-control" data-text="Deploy Option Name, This must be unique"><input type="text" placeholder="Deployment Name" class=" deployMethodName" value=""></div>'+
Expand All @@ -374,7 +381,8 @@ function addDeployMethod(){
'<div class="settingsInputTooltip tooltip tooltipBottom form-control" data-text="Deploy Method"><select class="deployMethodMethod" name="Deploy Method">'+
directOption+
helmOption+
restOption+
restOODOption+
restDirectOption+
'</select></div>'+
'<input type="text" class="form-control deployMethodActive" value="false">'+
'<button class="btn btn-secondary btn-sm tooltip tooltipBottom" data-text="Delete Deploy Option" type="button" onclick="removeDeployMethod(event)"><i class="material-icons md-24">delete</i></button>'+
Expand Down Expand Up @@ -466,9 +474,8 @@ function handleFetchErrors(response) {
return response;
}

async function helmDeploy() {
// Here as a placeholder until a single rest-deployment is worked out
// This code will largely be a copy form restDeploy, but slightly different
async function directRestDeploy(){
// fetch manager host and port from local storage
murl = window.localStorage.getItem("manager_url");
if (!murl) {
saveSettings();
Expand All @@ -477,9 +484,9 @@ async function helmDeploy() {
fillOutSettings()
murl = window.localStorage.getItem("manager_url");
})
}
};
var manager_url = new URL(murl);
console.log("In Helm Deploy")
console.log("In Direct REST Deploy");

const manager_host = manager_url.hostname;
const manager_port = manager_url.port;
Expand All @@ -499,58 +506,47 @@ async function helmDeploy() {
console.log("Manager prefix:'" + manager_prefix + "'");
console.log("Request mode:'" + request_mode + "'");


// sessionId must be unique or the request will fail
const lgName = pgtName.substring(0, pgtName.lastIndexOf("_pgt.graph"));
const sessionId = lgName + "-" + Date.now();
console.log("sessionId:'" + sessionId + "'");

// build urls
// the manager_url in this case has to point to daliuge_ood
const create_helm_url = manager_url + "/api/helm/start";
const pgt_url = "/gen_pg?tpl_nodes_len=1&pgt_id=" + pgtName; // TODO: tpl_nodes_len >= nodes in LG
const node_list_url = manager_url + "/api/nodes";
const pg_spec_url = "/gen_pg_spec";
const create_session_url = manager_url + "/api/sessions";
const append_graph_url = manager_url + "/api/sessions/" + sessionId + "/graph/append";
const deploy_graph_url = manager_url + "/api/sessions/" + sessionId + "/deploy";
const mgr_url = manager_url + "/session?sessionId=" + sessionId;
// fetch the PGT from this server
console.log("sending request to ", pgt_url);
console.log("graph name:", pgtName);
const pgt = await fetch(pgt_url, {
const nodes_url = manager_url + "/api/nodes";

const nodes = await fetch(nodes_url, {
method: 'GET',
mode: request_mode
})
.then(handleFetchErrors)
.then(response => response.json())
.catch(function (error) {
showMessageModal("Error", error + "\nGetting PGT unsuccessful: Unable to continue!");
});
// fetch the nodelist from engine
console.log("sending request to ", node_list_url);
const node_list = await fetch(node_list_url, {
.catch(function (error){
showMessageModal('Error', error + "\nGetting Nodes unsuccessful");
})
console.log(nodes)

const pgt_url = "/gen_pg?tpl_nodes_len=" + nodes.length.toString() + "&pgt_id=" + pgtName;
console.log("sending request to ", pgt_url);
console.log("graph name:", pgtName);
const pgt = await fetch(pgt_url, {
method: 'GET',
// mode: request_mode,
// credentials: 'include',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'Origin': 'http://localhost:8084'
},
})
.then(handleFetchErrors)
.then(response => response.json())
.catch(function (error) {
showMessageModal('Error', error + "\nGetting node_list unsuccessful: Unable to continue!");
showMessageModal('Error', error + "\nGetting PGT unsuccessful: Unable to continue!");
});
console.log("node_list", node_list);
// build object containing manager data

console.log("node_list", nodes);
const pg_spec_request_data = {
manager_host: manager_host,
node_list: node_list,
node_list: nodes,
pgt_id: pgt_id
}

console.log(pg_spec_request_data);
// request pg_spec from translator
const pg_spec_url = "/gen_pg_spec";
const pg_spec_response = await fetch(pg_spec_url, {
method: 'POST',
mode: request_mode,
Expand All @@ -564,10 +560,8 @@ async function helmDeploy() {
.catch(function (error) {
showMessageModal('Error', error + "\nGetting pg_spec unsuccessful: Unable to continue!");
});

console.log("pg_spec response", pg_spec_response);
// create session on engine
const session_data = {"sessionId": sessionId};
const create_session_url = manager_url + "/api/sessions";
const create_session = await fetch(create_session_url, {
credentials: 'include',
cache: 'no-cache',
Expand All @@ -592,6 +586,7 @@ async function helmDeploy() {
console.log("compressed_pg_spec", compressed_pg_spec);

// append graph to session on engine
const append_graph_url = manager_url + "/api/sessions/" + sessionId + "/graph/append";
const append_graph = await fetch(append_graph_url, {
credentials: 'include',
method: 'POST',
Expand All @@ -613,6 +608,7 @@ async function helmDeploy() {
console.log("append graph response", append_graph);
// deploy graph
// NOTE: URLSearchParams here turns the object into a x-www-form-urlencoded form
const deploy_graph_url = manager_url + "/api/sessions/" + sessionId + "/deploy";
const deploy_graph = await fetch(deploy_graph_url, {
credentials: 'include',
method: 'POST',
Expand All @@ -627,13 +623,11 @@ async function helmDeploy() {
showMessageModal('Error', error + "\nUnable to continue!");
});
//showMessageModal("Chart deployed" , "Check the dashboard of your k8s cluster for status updates.");
const mgr_url = manager_url + "/session?sessionId=" + sessionId;
console.log("deploy graph response", deploy_graph);
// Open DIM session page in new tab
// Until we have somewhere else to re-direct helm deployments. This is probably for the best.
//window.open(mgr_url, '_blank').focus();
window.open(mgr_url, '_blank').focus();
}


async function restDeploy() {
// fetch manager host and port from local storage
murl = window.localStorage.getItem("manager_url");
Expand Down
54 changes: 54 additions & 0 deletions docs/deployment.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,51 @@ Deployment with OpenOnDemand

`OpenOnDemand <https://openondemand.org>`_ (OOD) is a system providing an interactive interface to remote compute resources. It is becoming increasingly popular with a number of HPC centers around the world. The two Australian research HPC centers Pawsey and NCI are planning to roll it out for their users. Independently we had realized that |daliuge| is missing a authentication, authorization and session management system and started looking into OOD as a solution for this. After a short evaluation we have started integrating OOD into the deployment for our small in-house compute cluster. In order to make this work we needed to implement an additional interface between the translator running on an external server (e.g. AWS) and OOD and then further on into the (SLURM) batch job system. This interface code is currently in a separate private git repository, but will be released as soon as we have finished testing it. The code mimics the |daliuge| data island manager's REST interface, but instead of launching the workflow directly it prepares a SLURM job submission script and places it into the queue. Users can then use the standard OOD web-pages to monitor the jobs and get access to the logs and results of the workflow execution. OOD allows the integration of multiple compute resources, including Kubernetes and also (to a certain degree) GCP, AWS and Azure. Once configured, users can choose to submit their jobs to any of those. Our OOD interface code has been implemented as an OOD embedded `Phusion Passenger <https://www.phusionpassenger.com/>`_ `Flask <https://flask.palletsprojects.com/en/2.0.x/>`_ application, which is `WSGI <https://wsgi.readthedocs.io>`_ compliant. Very little inside that application is OOD specific and can thus be easily ported to other deployment scenarios.

:numref:`deployment.fig.ood` describes the actions taken by DALiuGE elements when submitting a graph through open on demand.
Importantly, the physical graph deployment is triggered by the user's browser directly, not the machine hosting the translator.

.. _deployment.fig.ood:

.. figure:: images/deploy_ood.jpeg

Sequence diagram of graph deployment in OOD envrionment.

Direct Deployment
~~~~~~~~~~~~~~~~~

It is of course possible to submit graphs to |daliuge| managers without additional runtime environments.
The manager and translator components can be docker images or raw processes.
We currently support two methods for submitting graphs in this scenario.

Direct
------

Direct deployments assumes the machine hosting the translator can communicate with the manager machines freely.
:numref:`deployment.fig.direct` presents a sequence diagram outlining the communication between the different components in this case.

.. _deployment.fig.direct:

.. figure:: images/deploy_direct.jpeg

Sequence diagram of direct graph deployment.

Restful
-------

Restful deployment is useful in the case where only a user's machine can communicate with engine instances but the translator cannot (as is often the case with an externally hosted translator process).
The browser in this case drives execution and submits the graph directly to the manager nodes.
:numref:`deployment.fig.rest` presents a sequence diagram outlining the communication between the different components in this case.
Conceptually this is similar to how the OpenOnDemand deployment works, but targeting direct graph deployment rather than slurm job submission.

N.B. Cross-Origin Resource Sharing (CORS) may return some interesting responses. If running all machines
locally, make sure that your host descriptions in EAGLE and the translator are 'localhost'.

.. _deployment.fig.rest:

.. figure:: images/deploy_directRest.jpeg

Sequence diagram of restful graph deployment.

Deployment with Kubernetes (Experimental)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand All @@ -75,6 +120,15 @@ Multi-node kubernetes clusters are now supported to get started see `start_helm_
Your environment will need have `kubectl` properly configured to point to your desired cluster.
See `daliuge-k8s/README.md <https://github.com/ICRAR/daliuge/tree/master/daliuge-k8s>`_ for a more detailed setup guide.

:numref:`deployment.fig.helm` describes the actions taken by DALiuGE elements when submitting a graph through helm.
Importantly, there is (currently) no return to the browser indicating success or failure of the submission or job.
The user will need to monitor the k8s environment directly.

.. _deployment.fig.helm:

.. figure:: images/deploy_helm.jpeg

Sequence diagram of graph deployment in helm environment.

Component Deployment
====================
Expand Down
Binary file added docs/images/deploy_direct.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/deploy_directRest.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/deploy_helm.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/deploy_ood.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 3ce2264

Please sign in to comment.