<a href="https://colab.research.google.com/github/Noxturnix/KoboldAI/blob/main/colab/GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# KoboldAI 0cc4m's fork (4bit support) on Google Colab

This notebook allows you to download and use 4bit quantized models (GPTQ) on Google Colab.

---
# How to use

0. If you are playing on a mobile device, tap the "run" button in the "Tap this if you play on Mobile" cell to prevent the system from killing this colab tab.
1. Choose a GPTQ model in the "Run this cell to download model" cell. You can type a custom model name in the `Model` field, but make sure to [rename the model file](https://docs.alpindale.dev/static/koboldai-4bit-9.png) to the right name, then click the "run" button
2. Click the "run" button in the "Click this to start KoboldAI" cell.
3. After you get your KoboldAI URL, open it (assume you are using the new UI), click "Load Model", click "Load a model from its directory", and choose a model you downloaded.
4. Enjoy! For prompting format, refer to the original model card of the model you selected.

In [None]:
#@title <-- Tap this if you play on Mobile { display-mode: "form" }
%%html
<b>Press play on the music player to keep the tab alive, then start KoboldAI below (Uses only 13MB of data)</b><br/>
<audio src="https://raw.githubusercontent.com/KoboldAI/KoboldAI-Client/main/colab/silence.m4a" controls>

In [None]:
#@title <-- Run this cell to download model

Model = "Pygmalion 13B 4bit GPTQ" #@param ["Pygmalion 13B 4bit GPTQ", "Metharme 13B 4bit GPTQ", "Wizard Vicuna 13B 4bit GPTQ", "Wizard Vicuna 13B Uncensored 4bit GPTQ", "Pygmalion 7B 4bit GPTQ", "Metharme 7B 4bit GPTQ", "Wizard Vicuna 7B Uncensored 4bit GPTQ"] {allow-input: true}
rename_model_file_from = "" #@param {type:"string"}
rename_model_file_to = "" #@param {type:"string"}
download_to_google_drive = True #@param {type:"boolean"}

from google.colab import drive
if download_to_google_drive:
  drive.mount('/content/drive/')
else:
  import os
  if not os.path.exists("/content/drive"):
    os.mkdir("/content/drive")
  if not os.path.exists("/content/drive/MyDrive/"):
    os.mkdir("/content/drive/MyDrive/")

if Model == "Pygmalion 13B 4bit GPTQ":
  model_name = "pygmalion-13b-4bit-128g"
  hf_url = f"https://huggingface.co/notstoic/{model_name}"
elif Model == "Metharme 13B 4bit GPTQ":
  model_name = "Metharme-13b-4bit-GPTQ"
  hf_url = f"https://huggingface.co/TehVenom/{model_name}"
  rename_model_file_from = "model.safetensors"
  rename_model_file_to = "4bit-128g.safetensors"
elif Model == "Wizard Vicuna 13B 4bit GPTQ":
  model_name = "wizard-vicuna-13B-GPTQ"
  hf_url = f"https://huggingface.co/TheBloke/{model_name}"
  rename_model_file_from = "wizard-vicuna-13B-GPTQ-4bit.compat.no-act-order.safetensors"
  rename_model_file_to = "4bit-128g.safetensors"
elif Model == "Wizard Vicuna 13B Uncensored 4bit GPTQ":
  model_name = "Wizard-Vicuna-13B-Uncensored-GPTQ"
  hf_url = f"https://huggingface.co/TheBloke/{model_name}"
  rename_model_file_from = "Wizard-Vicuna-13B-Uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors"
  rename_model_file_to = "4bit-128g.safetensors"
elif Model == "Pygmalion 7B 4bit GPTQ":
  model_name = "Pygmalion-7b-4bit-GPTQ-Safetensors"
  hf_url = f"https://huggingface.co/TehVenom/{model_name}"
  rename_model_file_from = "Pygmalion-7B-GPTQ-4bit.act-order.safetensors"
  rename_model_file_to = "4bit-128g.safetensors"
elif Model == "Metharme 7B 4bit GPTQ":
  model_name = "Metharme-7b-4bit-GPTQ-Safetensors"
  hf_url = f"https://huggingface.co/TehVenom/{model_name}"
  rename_model_file_from = "Metharme-7B-GPTQ-4bit.act-order.safetensors"
  rename_model_file_to = "4bit-128g.safetensors"
elif Model == "Wizard Vicuna 7B Uncensored 4bit GPTQ":
  model_name = "Wizard-Vicuna-7B-Uncensored-GPTQ"
  hf_url = f"https://huggingface.co/TheBloke/{model_name}"
  rename_model_file_from = "Wizard-Vicuna-7B-Uncensored-GPTQ-4bit-128g.no-act-order.safetensors"
  rename_model_file_to = "4bit-128g.safetensors"

print(f"Downloading {Model}...")
!mkdir -p /content/download_model && cd /content/download_model && git clone --single-branch --depth=1 $hf_url
!rm -rf /content/download_model/$model_name/.git

if rename_model_file_from and rename_model_file_to:
  print(f"Renaming {rename_model_file_from} to {rename_model_file_to}...")
  !cd /content/download_model/$model_name && mv $rename_model_file_from $rename_model_file_to

if download_to_google_drive:
  print("Uploading to Google Drive...")
!mkdir -p /content/drive/MyDrive/KoboldAI/models && mv /content/download_model/$model_name /content/drive/MyDrive/KoboldAI/models

In [None]:
#@title <b><-- Click this to start KoboldAI</b>

Version = "GPTQ" #@param ["GPTQ"] {allow-input: false}
Provider = "Cloudflare" #@param ["Localtunnel", "Cloudflare"]
use_google_drive = True #@param {type:"boolean"}

!nvidia-smi
from google.colab import drive
if use_google_drive:
  drive.mount('/content/drive/')
else:
  import os
  if not os.path.exists("/content/drive"):
    os.mkdir("/content/drive")
  if not os.path.exists("/content/drive/MyDrive/"):
    os.mkdir("/content/drive/MyDrive/")

Revision = ""

if Provider == "Localtunnel":
  tunnel = "--localtunnel yes"
else:
  tunnel = ""

!wget https://raw.githubusercontent.com/Noxturnix/KoboldAI/main/colabkobold.sh -O - | bash /dev/stdin -g $Version $tunnel