Skip to content

Commit

Permalink
Fix additional review issues with GPU config
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Jan 8, 2024
1 parent 3828eb8 commit 5dff322
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 13 deletions.
3 changes: 3 additions & 0 deletions llama.cpp/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
const std::string arg_prefix = "--";
llama_sampling_params & sparams = params.sparams;

assert(FLAG_gpu == LLAMAFILE_GPU_ERROR);
FLAG_gpu = LLAMAFILE_GPU_AUTO;

for (int i = 1; i < argc; i++) {
Expand Down Expand Up @@ -822,6 +823,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
params.kv_overrides.back().key[0] = 0;
}

params.n_gpu_layers = llamafile_gpu_layers(params.n_gpu_layers);

return true;
}

Expand Down
13 changes: 0 additions & 13 deletions llama.cpp/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,6 @@ int main(int argc, char ** argv) {
ShowCrashReports();
LoadZipArgs(&argc, &argv);

if (!IsXnuSilicon() &&
(!llamafile_has(argv, "-ngl") &&
!llamafile_has(argv, "--gpu-layers") &&
!llamafile_has(argv, "--n-gpu-layers"))) {
FLAG_gpu = LLAMAFILE_GPU_DISABLE;
}

if (!llamafile_has(argv, "--cli") &&
(llamafile_has(argv, "--server") ||
(!llamafile_has(argv, "-p") &&
Expand All @@ -143,12 +136,6 @@ int main(int argc, char ** argv) {
if (!gpt_params_parse(argc, argv, params)) {
return 1;
}

if (params.n_gpu_layers > 0 && !llamafile_gpu_supported()) {
fprintf(stderr, "fatal error: --n-gpu-layers %d was passed but no gpus were found\n", params.n_gpu_layers);
exit(1);
}

llama_sampling_params & sparams = params.sparams;

#ifndef LOG_DISABLE_LOGS
Expand Down
6 changes: 6 additions & 0 deletions llama.cpp/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2044,6 +2044,9 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
std::string arg;
bool invalid_param = false;

assert(FLAG_gpu == LLAMAFILE_GPU_ERROR);
FLAG_gpu = LLAMAFILE_GPU_AUTO;

for (int i = 1; i < argc; i++)
{
arg = argv[i];
Expand Down Expand Up @@ -2462,6 +2465,9 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
exit(1);
}
}

params.n_gpu_layers = llamafile_gpu_layers(params.n_gpu_layers);

if (!params.kv_overrides.empty()) {
params.kv_overrides.emplace_back(llama_model_kv_override());
params.kv_overrides.back().key[0] = 0;
Expand Down
1 change: 1 addition & 0 deletions llamafile/cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ static bool CompileAmdUnix(const char *dso, const char *src, const char *tmpdso)
"-use_fast_math",
"-DGGML_BUILD=1",
"-DGGML_SHARED=1",
"-Wno-return-type",
"-Wno-unused-result",
"-DGGML_USE_HIPBLAS",
"-DGGML_CUDA_MMV_Y=1",
Expand Down
18 changes: 18 additions & 0 deletions llamafile/gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,24 @@ bool llamafile_gpu_supported(void) {
return ggml_cublas_loaded();
}

/**
* Figures out the GPU story after config is loaded.
*/
int llamafile_gpu_layers(int n_gpu_layers) {
if (n_gpu_layers > 0) {
if (!llamafile_gpu_supported()) {
fprintf(stderr, "fatal error: --n-gpu-layers %d was passed but no gpus were found\n",
n_gpu_layers);
exit(1);
}
} else if (n_gpu_layers == -1 && ggml_metal_supported()) {
n_gpu_layers = 1;
} else {
FLAG_gpu = LLAMAFILE_GPU_DISABLE;
}
return n_gpu_layers;
}

/**
* Parses `--gpu` flag.
* @return GPU configuration, or -1 if `s` is a bad value
Expand Down
1 change: 1 addition & 0 deletions llamafile/llamafile.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ extern int FLAG_gpu;
extern bool FLAG_tinyblas;
extern bool FLAG_nocompile;
extern bool FLAG_recompile;
int llamafile_gpu_layers(int);
bool llamafile_gpu_supported(void);
int llamafile_gpu_parse(const char *);
const char *llamafile_describe_gpu(void);
Expand Down

0 comments on commit 5dff322

Please sign in to comment.