TabbyML · wsxiaoys · Dec 13, 2023 · Dec 10, 2023 · Dec 10, 2023 · Dec 10, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -1,2 +1,8 @@
+.idea
+ci
+clients
+.github
+python
 **/target
 **/node_modules
+website
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -26,8 +26,7 @@ jobs:
     container: ${{ matrix.container }}
     strategy:
       matrix:
-        binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117,
-                 x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122]
+        binary: [aarch64-apple-darwin, x86_64-manylinux2014, x86_64-manylinux2014-cuda117, x86_64-windows-msvc-cuda117, x86_64-windows-msvc-cuda122, x86_64-manylinux2014-rocm57]
         include:
           - os: macos-latest
             target: aarch64-apple-darwin
@@ -53,6 +52,11 @@ jobs:
             ext: .exe
             build_args: --features cuda
             windows_cuda: '12.2.0'
+          - os: ubuntu-latest
+            target: x86_64-unknown-linux-gnu
+            binary: x86_64-manylinux2014-rocm57
+            container: ghcr.io/cromefire/hipblas-manylinux/2014/5.7:latest
+            build_args: --features rocm
 
     env:
       SCCACHE_GHA_ENABLED: true
@@ -72,7 +76,8 @@ jobs:
           target: ${{ matrix.target }}
           components: clippy
 
-      - run: rustup default ${{ env.RUST_TOOLCHAIN }}
+      - name: Set default rust version
+        run: rustup default ${{ env.RUST_TOOLCHAIN }}
 
       - name: Sccache cache
         uses: mozilla-actions/sccache-action@v0.0.3

diff --git a/Dockerfile b/Dockerfile
@@ -29,12 +29,13 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- --default-toolchain ${RUST_TOOLC
 ENV PATH="/root/.cargo/bin:${PATH}"
 
 WORKDIR /root/workspace
-COPY . .
 
 RUN mkdir -p /opt/tabby/bin
 RUN mkdir -p /opt/tabby/lib
 RUN mkdir -p target
 
+COPY . .
+
 RUN --mount=type=cache,target=/usr/local/cargo/registry \
     --mount=type=cache,target=/root/workspace/target \
     cargo build --features cuda --release --package tabby && \

diff --git a/website/docs/extensions/troubleshooting.md b/website/docs/extensions/troubleshooting.md
@@ -112,9 +112,9 @@ for the current code context.
 If your completion requests are timing out, Tabby may display a warning message. 
 This could be due to network issues or poor server performance, especially when 
 running a large model on a CPU. To improve performance, consider running the model 
-on a GPU with CUDA support or on Apple M1/M2 with Metal support. When running 
-the server, make sure to specify the device in the arguments using  `--device cuda` 
-or `--device metal`. You can also try using a smaller model from the available [models](https://tabby.tabbyml.com/docs/models/). 
+on a GPU with CUDA or ROCm support or on Apple M1/M2 with Metal support. When running 
+the server, make sure to specify the device in the arguments using  `--device cuda`, `--device rocm` or
+`--device metal`. You can also try using a smaller model from the available [models](https://tabby.tabbyml.com/docs/models/). 
 
 By default, the timeout for automatically triggered completion requests is set to 4 seconds. 
 You can adjust this timeout value in the `~/.tabby-client/agent/config.toml` configuration file.

diff --git a/website/docs/faq.mdx b/website/docs/faq.mdx
@@ -1,10 +1,11 @@
-import CodeBlock from '@theme/CodeBlock';
-
 # ⁉️ Frequently Asked Questions
 
 <details>
   <summary>How much VRAM a LLM model consumes?</summary>
-  <div>By default, Tabby operates in int8 mode with CUDA, requiring approximately 8GB of VRAM for CodeLlama-7B.</div>
+    <div>
+        <p>By default, Tabby operates in int8 mode with CUDA, requiring approximately 8GB of VRAM for CodeLlama-7B.</p>
+        <p>For ROCm the actual limits are currently largely untested, but the same CodeLlama-7B seems to use 8GB of VRAM as well on a AMD Radeon™ RX 7900 XTX according to the ROCm monitoring tools.</p>
+    </div>
 </details>
 
 <details>
@@ -24,7 +25,17 @@ import CodeBlock from '@theme/CodeBlock';
 <details>
   <summary>How to utilize multiple NVIDIA GPUs?</summary>
   <div>
-    <p>Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES accordingly.</p>
+    <p>Tabby only supports the use of a single GPU. To utilize multiple GPUs, you can initiate multiple Tabby instances and set CUDA_VISIBLE_DEVICES or HIP_VISIBLE_DEVICES accordingly.</p>
+  </div>
+</details>
+
+<details>
+  <summary>My AMD ROCm device isn't supported by ROCm</summary>
+  <div>
+    <p>
+      You can use the HSA_OVERRIDE_GFX_VERSION variable if there is a similar GPU that is supported by ROCm you can set it to that.
+      For example for RDNA2 you can set it to 10.3.0 and to 11.0.0 for RDNA3.
+    </p>
   </div>
 </details>
 

diff --git a/website/docs/installation/apple.md b/website/docs/installation/apple.md
@@ -14,4 +14,4 @@ brew install tabbyml/tabby/tabby
 tabby serve --device metal --model TabbyML/StarCoder-1B
 ```
 
-The compute power of M1/M2 is limited and is likely to be sufficient only for individual usage. If you require a shared instance for a team, we recommend considering Docker hosting with CUDA. You can find more information about Docker [here](./docker).
+The compute power of M1/M2 is limited and is likely to be sufficient only for individual usage. If you require a shared instance for a team, we recommend considering Docker hosting with CUDA or ROCm. You can find more information about Docker [here](./docker).
diff --git a/website/docs/installation/docker-compose.mdx b/website/docs/installation/docker-compose.mdx
@@ -5,6 +5,8 @@ sidebar_position: 1
 # Docker Compose
 This guide explains how to launch Tabby using docker-compose.
 
+
+
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
@@ -16,8 +18,8 @@ version: '3.5'
 
 services:
   tabby:
-    restart: always
-    image: tabbyml/tabby
+    restart: unless-stopped
+    image: tabbyml/tabby-cuda
     command: serve --model TabbyML/StarCoder-1B --device cuda
     volumes:
       - "$HOME/.tabby:/data"
@@ -33,14 +35,33 @@ services:
 ```
 
   </TabItem>
+  <TabItem value="rocm" label="ROCm">
+
+```yaml title="docker-compose.yml"
+version: '3.5'
+services:
+  tabby:
+    restart: unless-stopped
+    image: tabbyml/tabby-rocm
+    command: serve --model TabbyML/StarCoder-1B --device rocm
+    volumes:
+      - "$HOME/.tabby:/data"
+    ports:
+      - 8080:8080
+    devices:
+      - /dev/dri
+      - /dev/kfd
+```
+
+    </TabItem>
   <TabItem value="cpu" label="CPU">
 
 ```yaml title="docker-compose.yml"
 version: '3.5'
 
 services:
   tabby:
-    restart: always
+    restart: unless-stopped
     image: tabbyml/tabby
     command: serve --model TabbyML/StarCoder-1B
     volumes:

diff --git a/website/docs/installation/docker.mdx b/website/docs/installation/docker.mdx
@@ -13,7 +13,14 @@ import TabItem from '@theme/TabItem';
   <TabItem value="cuda" label="CUDA (requires NVIDIA Container Toolkit)" default>
 
   ```bash title="run.sh"
-  docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby serve --model TabbyML/StarCoder-1B --device cuda
+  docker run -it --gpus all -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-cuda serve --model TabbyML/StarCoder-1B --device cuda
+  ```
+
+  </TabItem>
+  <TabItem value="rocm" label="ROCm" default>
+
+  ```bash title="run.sh"
+  docker run -it --device /dev/dri --device /dev/kfd -p 8080:8080 -v $HOME/.tabby:/data tabbyml/tabby-rocm serve --model TabbyML/StarCoder-1B --device rocm
   ```
 
   </TabItem>