Fix additional review issues with GPU config

Mozilla-Ocho · Jan 8, 2024 · 5dff322 · 5dff322
1 parent 3828eb8
commit 5dff322
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 13 deletions.
diff --git a/llama.cpp/common.cpp b/llama.cpp/common.cpp
@@ -152,6 +152,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
     const std::string arg_prefix = "--";
     llama_sampling_params & sparams = params.sparams;
 
+    assert(FLAG_gpu == LLAMAFILE_GPU_ERROR);
     FLAG_gpu = LLAMAFILE_GPU_AUTO;
 
     for (int i = 1; i < argc; i++) {
@@ -822,6 +823,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
         params.kv_overrides.back().key[0] = 0;
     }
 
+    params.n_gpu_layers = llamafile_gpu_layers(params.n_gpu_layers);
+
     return true;
 }
 

diff --git a/llama.cpp/main/main.cpp b/llama.cpp/main/main.cpp
@@ -118,13 +118,6 @@ int main(int argc, char ** argv) {
     ShowCrashReports();
     LoadZipArgs(&argc, &argv);
 
-    if (!IsXnuSilicon() &&
-        (!llamafile_has(argv, "-ngl") &&
-         !llamafile_has(argv, "--gpu-layers") &&
-         !llamafile_has(argv, "--n-gpu-layers"))) {
-        FLAG_gpu = LLAMAFILE_GPU_DISABLE;
-    }
-
     if (!llamafile_has(argv, "--cli") &&
         (llamafile_has(argv, "--server") ||
          (!llamafile_has(argv, "-p") &&
@@ -143,12 +136,6 @@ int main(int argc, char ** argv) {
     if (!gpt_params_parse(argc, argv, params)) {
         return 1;
     }
-
-    if (params.n_gpu_layers > 0 && !llamafile_gpu_supported()) {
-        fprintf(stderr, "fatal error: --n-gpu-layers %d was passed but no gpus were found\n", params.n_gpu_layers);
-        exit(1);
-    }
-
     llama_sampling_params & sparams = params.sparams;
 
 #ifndef LOG_DISABLE_LOGS

diff --git a/llama.cpp/server/server.cpp b/llama.cpp/server/server.cpp
@@ -2044,6 +2044,9 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
     std::string arg;
     bool invalid_param = false;
 
+    assert(FLAG_gpu == LLAMAFILE_GPU_ERROR);
+    FLAG_gpu = LLAMAFILE_GPU_AUTO;
+
     for (int i = 1; i < argc; i++)
     {
         arg = argv[i];
@@ -2462,6 +2465,9 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
             exit(1);
         }
     }
+
+    params.n_gpu_layers = llamafile_gpu_layers(params.n_gpu_layers);
+
     if (!params.kv_overrides.empty()) {
         params.kv_overrides.emplace_back(llama_model_kv_override());
         params.kv_overrides.back().key[0] = 0;

diff --git a/llamafile/cuda.c b/llamafile/cuda.c
@@ -534,6 +534,7 @@ static bool CompileAmdUnix(const char *dso, const char *src, const char *tmpdso)
         "-use_fast_math",
         "-DGGML_BUILD=1",
         "-DGGML_SHARED=1",
+        "-Wno-return-type",
         "-Wno-unused-result",
         "-DGGML_USE_HIPBLAS",
         "-DGGML_CUDA_MMV_Y=1",

diff --git a/llamafile/gpu.c b/llamafile/gpu.c
@@ -57,6 +57,24 @@ bool llamafile_gpu_supported(void) {
     return ggml_cublas_loaded();
 }
 
+/**
+ * Figures out the GPU story after config is loaded.
+ */
+int llamafile_gpu_layers(int n_gpu_layers) {
+    if (n_gpu_layers > 0) {
+        if (!llamafile_gpu_supported()) {
+            fprintf(stderr, "fatal error: --n-gpu-layers %d was passed but no gpus were found\n",
+                    n_gpu_layers);
+            exit(1);
+        }
+    } else if (n_gpu_layers == -1 && ggml_metal_supported()) {
+        n_gpu_layers = 1;
+    } else {
+        FLAG_gpu = LLAMAFILE_GPU_DISABLE;
+    }
+    return n_gpu_layers;
+}
+
 /**
  * Parses `--gpu` flag.
  * @return GPU configuration, or -1 if `s` is a bad value

diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h
@@ -38,6 +38,7 @@ extern int FLAG_gpu;
 extern bool FLAG_tinyblas;
 extern bool FLAG_nocompile;
 extern bool FLAG_recompile;
+int llamafile_gpu_layers(int);
 bool llamafile_gpu_supported(void);
 int llamafile_gpu_parse(const char *);
 const char *llamafile_describe_gpu(void);