# EP

### But : Réserver plusieurs noeuds d'un cluster et lancer smpirun ep à l'aide de "execo.action.Remote"


#### Informations générales

In [1]:
experiment_name = "EP"

In [39]:
%%bash -s
git log -n 1
python --version

Python 2.7.13


#### Préparation de l'environnement

In [40]:
%%bash -s
pip3 install --user execo

Collecting execo
Installing collected packages: execo
Successfully installed execo-2.6.3


In [42]:
import execo
from execo import *
from execo_g5k import *
from execo_engine import *
import os
import sys
import json
import time
import datetime
import re
from shutil import copy
from subprocess import check_output

##### Réservation noeuds

In [None]:
# Nom du job
jobname = ' '
# Nombre de noeuds
nodecount = 5
# Temps réservation
walltime = "6:0:0" 

# filters out Nancy's graphene cluster (buggy)
resources_selection = "-p \"cluster != 'graphene'\"" 

# Nancy
site = "nancy"

In [None]:
jobs = get_current_oar_jobs()
jobid = None
waiting_jobs = []
while jobs:
    j, site = jobs.pop()
    info = get_oar_job_info(j, site)
    if info['name'] == jobname:
        if info['state'] == 'Running':
            jobid = j
            print("A {} job is already running, using it. jobid is {}".format(jobname, jobid))
            break
        else:
            waiting_jobs.append(j)
if not jobid and not waiting_jobs:
    jobspec = OarSubmission(resources="/cluster=1/nodes={}".format(nodecount), walltime=walltime,
                            additional_options=resources_selection, job_type="deploy", name=jobname)
    jobid, _ = oarsub([(jobspec, site)]).pop()
    print("New job submitted, jobid is {}".format(jobid))
elif not jobid:
    print("One or more {} jobs exist ({}) but are not running.\n"
          " Connect to the frontend to see what is happening, and/or run the cell again.".format(
          jobname, ", ".join([str(j) for j in waiting_jobs])))

On les range

In [None]:
nodes = get_oar_job_nodes(jobid)
nodes.sort(key=lambda n: n.address)
nodes

##### Déploiement de l'environnement

In [None]:
force_redeploy = False # set to True to force redeploying the OS on the nodes in the deployment section
environment_dsc_file = '../../../../../EP/debian9-x64-bigdata-tutorial.yaml' # filename of the kadeploy environment file (YAML)

In [None]:
deployment = Deployment(hosts=nodes, env_file = os.path.abspath(environment_dsc_file),
                        other_options="-r ext4 --no-debug-mode")

deploy_ok, deploy_failed = deploy(deployment, check_deployed_command=not force_redeploy,
                              stdout_handlers=[sys.stdout],
                              stderr_handlers=[sys.stderr])

#### Test : commande simple 'ls'

In [None]:
# execo.action.Remote(cmd,hosts,connection_params,process_args)
Remote_test = execo.action.Remote(cmd='ls',hosts=nodes,connection_params=None, process_args=None)
Remote_test.run().ok

#### Installation environnement sur noeud

In [None]:
Remote_install = execo.action.Remote(cmd='apt-get install g++ libboost-all-dev && wget http://gforge.inria.fr/frs/download.php/latestfile/8/SimGrid-3.18.tar.gz && tar -xvf SimGrid-3.18.tar.gz && cd SimGrid-3.18 && cmake -DCMAKE_INSTALL_PREFIX=/usr/local -Denable_smpi=on -Denable_documentation=off && make -j && make check && make install -j',hosts=nodes,connection_params={'user':'root'})

In [None]:
Remote_install.run().ok

In [50]:
path = "~/Stage-POLARIS/SimGrid/examples/SMPI/"+experiment_name+'/src/'

#### Récupération fichiers sources

In [45]:
execo.action.Remote(cmd='mkdir -p experiments_src',hosts=nodes,connection_params={'user':'root'}).run().ok

True

In [51]:
Remote_smpi_env = execo.action.Put(hosts=nodes,local_files=[path+"griffon.xml",path+"griffon_hostfile.txt",path+"broadcast.c",path+"Makefile"],remote_location='~/experiments_src',connection_params={'user':'root'})
Remote_smpi_env.run().ok

True

#### Execution de l'expérience

In [52]:
Remote_smpi = execo.action.Remote(cmd ='cd ~/experiments_src && make && make run NPROCS=2',hosts=nodes,connection_params={'user':'root'})
Remote_smpi.run().ok

True

#### Récupération des résultats

In [53]:
execo.Process(cmd='mkdir -p /home/nezzine/Stage-POLARIS/SimGrid/examples/SMPI/'+experiment_name+'/results/'+datetime.datetime.now().strftime("%Y-%m-%d_")).run().ok

True

In [55]:
for i in range(0,nodecount) :
    execo.action.Get(hosts=nodes[i],remote_files=["~/experiments_src/smpi_results.txt"], local_location='~/Stage-POLARIS/SimGrid/examples/SMPI/'+experiment_name+'/results/'+datetime.datetime.now().strftime("%Y-%m-%d_")+'/'+get_host_shortname(nodes[i])+'_'+datetime.datetime.now().strftime("%Y-%m-%d_%H:%M")+'.txt',connection_params={'user':'root'}).run().ok

#### Fin d'expérience : suppression du job

In [None]:
oardel(jobid)

#### Publication des résultats

In [3]:
%%bash -s $experiment_name
git add .
git commit -m 'Automatic results update : '"$experiment_name"

[master bd05a25] Automatic results update :
 Committer: Najwa Ez Zine <nezzine@fnancy>
Your name and email address were configured automatically based
on your username and hostname. Please check that they are accurate.
You can suppress this message by setting them explicitly. Run the
following command and follow the instructions in your editor to edit
your configuration file:

    git config --global --edit

After doing this, you may fix the identity used for this commit with:

    git commit --amend --reset-author

 1 file changed, 5 insertions(+), 15 deletions(-)
