-
Notifications
You must be signed in to change notification settings - Fork 10
/
main.slurm
36 lines (29 loc) · 1.41 KB
/
main.slurm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash -l
#SBATCH --account=yrf@gpu
#SBATCH --nodes=4
#SBATCH --gres=gpu:4 # nombre de GPU à réserver (un unique GPU ici)
#SBATCH --ntasks-per-node=4 # nombre de coeurs à réserver (un quart du noeud)
#SBATCH --cpus-per-task=10
#SBATCH --time=04:00:00
#SBATCH --hint=nomultithread
#SBATCH -C v100-32g
#SBATCH --qos=qos_gpu-t3
#SBATCH --output=log%j.out
#SBATCH --error=log%j.err
# activate conda env
#source activate $1
module purge
# chargement des modules
eval "$(conda shell.bash hook)"
conda activate 4dvarnet
export PYTHONPATH=${WORK}/pacnet:${WORK}/4dvarnet-core:${PYTHONPATH}
# run script from above
# config files stored in config_q --> examples: --config=q.nad_swot
# train the model
#srun python main.py --config=$1 --max_epochs=30 --progress_bar_refresh_rate=5 run
#srun python hydra_main.py xp=ose_gf_wsst file_paths=jz_ose entrypoint=run entrypoint.max_epochs=1
srun python hydra_main.py xp=stoch_En4DVar_osse_gf file_paths=jz_osse entrypoint=run entrypoint.max_epochs=3
#srun python hydra_main.py xp=sla_natl file_paths=jz_osse entrypoint=run entrypoint.max_epochs=50
# test the model
#srun python main.py --config=$1 --ckpt_path=checkpoints/GF_OSE_OSSE.ckpt --progress_bar_refresh_rate=5 test
#srun python hydra_main.py xp=sla_natl file_paths=jz_osse entrypoint=test entrypoint.ckpt_path="lightning_logs/version_1011512/checkpoints/modelSLAInterpGF-Exp3-epoch\=05-val_loss\=0.16.ckpt"