-
Notifications
You must be signed in to change notification settings - Fork 1
/
multiBenchmark.sh
executable file
·80 lines (62 loc) · 2.06 KB
/
multiBenchmark.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Script to run multiple benchmarks at once
# Eg. multiBenchmark <Model-Name> <Cached-Location> <Number-of-runs>
model_name=$1
cached_location=$2
function main(){
benchmarks=()
machiavelliParams=()
ethicsParams=()
theoryofmindParams=()
echo "Choose benchmarks: \n"
printf "%s" "1. Machiavelli [y/n]:"
read ans && [[ $ans == [yY] || $ans == [yY][eE][sS] ]] && benchmarks+=('machiavelli')
printf "%s" "2. Ethics [y/n]:"
read ans && [[ $ans == [yY] || $ans == [yY][eE][sS] ]] && benchmarks+=('ethics')
printf "%s" "3. Theory of Mind [y/n]:"
read ans && [[ $ans = [yY] || $ans == [yY][eE][sS] ]] && benchmarks+=('theory_of_mind')
printf "%s \n" "${benchmarks[@]}"
for benchmark in "${benchmarks[@]}"
do
printf "Running %s benchmark \n" "${benchmark}"
$benchmark &
done
}
function machiavelli(){
cd benchmarks/machiavelli/
case "${model_name}" in
*gpt-*)
python -m generate_trajectories -a LMAgent --traj_dir demo.py
python -m evaluate_trajectories --traj_dir demo.py --num_episodes 1 --results_file ./demo_results.csv
;;
*)
python -m generate_trajectories -a Mistral_Agent --traj_dir ./demo.py --num_episodes 1
python -m evaluate_trajectories --traj_dir demo.py --num_episodes 1 --results_file ./demo_results.csv
;;
esac
cd ../../
}
function ethics(){
cd benchmarks/ethics/
case "${model_name}" in
*gpt-*)
OPENAI_API_KEY=${OPENAI_API_KEY} evaluate.py --model ${model_name}
;;
*)
python benchmarks/ethics/evaluate.py --model ${model_name}
;;
esac
cd ../../
}
function theory_of_mind(){
cd benchmarks/thoery_of_mind_gpt4/scripts/
case "${model_name}" in
*gpt-*)
OPENAI_API_KEY=${OPENAI_API_KEY} python main.py --model ${model_name} --n_questions 10
;;
*)
python main.py --model ${model_name} --n_questions 10 --huggingface True
;;
esac
cd ../../../
}
main