<a href="https://colab.research.google.com/github/C-nocturnum/university/blob/main/LLM_to_GGUF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 # Model Quantization with GGUF in Google Colab
 ## [ruslanmv/convert-model-to-gguf](https://github.com/ruslanmv/convert-model-to-gguf/blob/master/LLM_to_GGUF.ipynb)

## Setup Google Colab

In [44]:
!pip install gradio --no-binary :all:
!pip install huggingface_hub==0.25.0
#!pip install --force-reinstall pandas==2.2.2
!pip install pandas numpy protobuf
!pip install --force-reinstall numpy==2.1.0
!pip install --force-reinstall protobuf==5.29.3

Collecting huggingface_hub==0.25.0
  Using cached huggingface_hub-0.25.0-py3-none-any.whl.metadata (13 kB)
Using cached huggingface_hub-0.25.0-py3-none-any.whl (436 kB)
Installing collected packages: huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface-hub 0.23.5
    Uninstalling huggingface-hub-0.23.5:
      Successfully uninstalled huggingface-hub-0.23.5
Successfully installed huggingface_hub-0.25.0


Collecting numpy==2.1.0
  Using cached numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Using cached numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.3 MB)
^C
^C


In [1]:
from IPython.display import clear_output
!git clone https://github.com/ggerganov/llama.cpp.git
%cd llama.cpp
!apt update
!apt install -y build-essential cmake git libssl-dev libcurl4-openssl-dev
clear_output()

In [2]:
!pwd

/content/llama.cpp


## Installation Standard

### With Make

In [3]:
!make
clear_output()

### With cmake

In [20]:
#cmake -B build
#cmake --build build --config Release

## CUDA Enabeled


In [21]:
### With make

In [22]:
#CUDA ENABELED
#make GGML_CUDA=1

### With cmake

In [24]:
#cmake -B build -DGGML_CUDA=ON
#cmake --build build --config Release

SyntaxError: invalid syntax (<ipython-input-24-4390243682ca>, line 1)

Now you can see that there are installed severeal appications:

In [4]:
!ls

AUTHORS			       docs	   mypy.ini
build-xcframework.sh	       examples    pocs
ci			       flake.lock  poetry.lock
cmake			       flake.nix   prompts
CMakeLists.txt		       ggml	   pyproject.toml
CMakePresets.json	       gguf-py	   pyrightconfig.json
CODEOWNERS		       grammars    README.md
common			       include	   requirements
CONTRIBUTING.md		       LICENSE	   requirements.txt
convert_hf_to_gguf.py	       licenses    scripts
convert_hf_to_gguf_update.py   Makefile    SECURITY.md
convert_llama_ggml_to_gguf.py  media	   src
convert_lora_to_gguf.py        models	   tests


In [5]:
 %cd ..

/content


In [7]:
#!python convert.py C-Nocturnum/Meta-Llama-3-8B-Instruct-cyber-abliterated -o CyberAbliterated-Llama.gguf --format gguf


python3: can't open file '/content/convert.py': [Errno 2] No such file or directory




# Hugging Face Model Quantization with GGUF in Google Colab

This notebook provides a streamlined way to quantize a Hugging Face model using the GGUF format.


In [27]:
from IPython.display import clear_output
# Clone the llama.cpp repository
#!git clone https://github.com/ggerganov/llama.cpp
!pip install -r llama.cpp/requirements.txt
# Install additional dependencies
!pip install gradio "huggingface-hub" "gradio_huggingfacehub_search==0.0.7" "APScheduler"
clear_output()

In [28]:
import os
import shutil
import subprocess
import signal
import gradio as gr

from huggingface_hub import create_repo, HfApi
from huggingface_hub import snapshot_download
from huggingface_hub import whoami
from huggingface_hub import ModelCard

from gradio_huggingfacehub_search import HuggingfaceHubSearch

from apscheduler.schedulers.background import BackgroundScheduler

from textwrap import dedent

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [29]:
from google.colab import userdata
HF_TOKEN = userdata.get('HF_TOKEN')
from huggingface_hub import login
login(token=HF_TOKEN)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [30]:
from huggingface_hub import HfApi

def get_username(hf_token):
    """Retrieves the username associated with the Hugging Face token."""
    api = HfApi()
    try:
        whoami_info = api.whoami(token=hf_token)
        username = whoami_info["name"]
        return username
    except Exception as e:
        raise ValueError(f"Error retrieving username: {e}")

In [31]:
username = get_username(HF_TOKEN)
print(f"Logged in as: {username}")  # Display for verification

Logged in as: C-Nocturnum


In [32]:
def generate_importance_matrix(model_path, train_data_path):
    imatrix_command = f"./llama-imatrix -m ../{model_path} -f {train_data_path} -ngl 99 --output-frequency 10"

    os.chdir("llama.cpp")

    print(f"Current working directory: {os.getcwd()}")
    print(f"Files in the current directory: {os.listdir('.')}")

    if not os.path.isfile(f"../{model_path}"):
        raise Exception(f"Model file not found: {model_path}")

    print("Running imatrix command...")
    process = subprocess.Popen(imatrix_command, shell=True)

    try:
        process.wait(timeout=60)
    except subprocess.TimeoutExpired:
        print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
        process.send_signal(signal.SIGINT)
        try:
            process.wait(timeout=5)
        except subprocess.TimeoutExpired:
            print("Imatrix process still didn't terminate. Forcibly terminating process...")
            process.kill()

    os.chdir("..")

    print("Importance matrix generation completed.")

def split_upload_model(model_path, repo_id,HF_TOKEN, split_max_tensors=256, split_max_size=None):
    if HF_TOKEN is None:
        raise ValueError("You have to be logged in.")

    split_cmd = f"llama.cpp/llama-gguf-split --split --split-max-tensors {split_max_tensors}"
    if split_max_size:
        split_cmd += f" --split-max-size {split_max_size}"
    split_cmd += f" {model_path} {model_path.split('.')[0]}"

    print(f"Split command: {split_cmd}")

    result = subprocess.run(split_cmd, shell=True, capture_output=True, text=True)
    print(f"Split command stdout: {result.stdout}")
    print(f"Split command stderr: {result.stderr}")

    if result.returncode != 0:
        raise Exception(f"Error splitting the model: {result.stderr}")
    print("Model split successfully!")


    sharded_model_files = [f for f in os.listdir('.') if f.startswith(model_path.split('.')[0])]
    if sharded_model_files:
        print(f"Sharded model files: {sharded_model_files}")
        api = HfApi(token=HF_TOKEN)
        for file in sharded_model_files:
            file_path = os.path.join('.', file)
            print(f"Uploading file: {file_path}")
            try:
                api.upload_file(
                    path_or_fileobj=file_path,
                    path_in_repo=file,
                    repo_id=repo_id,
                )
            except Exception as e:
                raise Exception(f"Error uploading file {file_path}: {e}")
    else:
        raise Exception("No sharded files found.")

    print("Sharded model has been uploaded successfully!")


In [33]:
from huggingface_hub import HfApi, login, CommitOperationAdd
import io
import io
import tempfile
def update_model_card(model_id, username, model_name, q_method, hf_token, new_repo_id, quantized_gguf_name):
    """
    Creates or updates the model card (README.md) for the GGUF-converted model on the Hugging Face Hub.
    """

    # Log in to Hugging Face
    #login(token=hf_token, add_to_git_credential=True)
    api = HfApi()

    # Model card content (Markdown format) with YAML metadata
    card_text = f"""
---
tags:
- gguf
- llama.cpp
- quantized
- {model_id}
license: apache-2.0
---

# {new_repo_id}

This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via
[Convert Model to GGUF](https://github.com/ruslanmv/convert-model-to-GGUF).

**Key Features:**

* Quantized for reduced file size (GGUF format)
* Optimized for use with llama.cpp
* Compatible with llama-server for efficient serving

Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the base model.

## Usage with llama.cpp

**1. Install llama.cpp:**

```bash
brew install llama.cpp  # For macOS/Linux
```

**2. Run Inference:**

**CLI:**

```bash
llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "Your prompt here"
```

**Server:**

```bash
llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
```

For more advanced usage, refer to the [llama.cpp repository](https://github.com/ggerganov/llama.cpp).
"""

    # Convert card_text to BytesIO object
    card_text_bytes = io.BytesIO(card_text.encode())



    # Write card_text to a temporary file
    with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".md") as temp_file:
        temp_file.write(card_text)
        temp_path = temp_file.name

    # Create or update the README.md file
    operations = [
        CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=temp_path)
    ]

    # Create or update the README.md file
    #operations = [  CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=card_text_bytes)    ]

    api.create_commit(
        repo_id=new_repo_id,
        operations=operations,
        commit_message="Create/update model card (README.md)"
    )



    print("Model card (README.md) updated/created successfully!")


In [34]:

# Example Usage (replace with actual values)
model_id = "C-Nocturnum/Meta-Llama-3-8B-Instruct-cyber-abliterated"
username = "C-Nocturnum"
model_name = "C-Nocturnum/Meta-Llama-3-8B-Instruct-cyber-abliterated-GGUF"
q_method = "4bit"
hf_token = HF_TOKEN  # Replace with your actual Hugging Face token
new_repo_id = f"{username}/{model_name}"
quantized_gguf_name = f"{model_name}-{q_method}.gguf"#update_model_card(model_id, username, model_name, q_method, hf_token, new_repo_id, quantized_gguf_name)

In [35]:
def process_model(model_id, q_method, use_imatrix, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, hf_token):
    if hf_token is None:
        raise ValueError("You must be logged in to use HF Convert")

    model_name = model_id.split('/')[-1]
    fp16 = f"{model_name}.fp16.gguf"

    try:
        api = HfApi(token=hf_token)

        dl_pattern = ["*.md", "*.json", "*.model"]

        pattern = (
            "*.safetensors"
            if any(
                file.path.endswith(".safetensors")
                for file in api.list_repo_tree(
                    repo_id=model_id,
                    recursive=True,
                )
            )
            else "*.bin"
        )

        dl_pattern += pattern

        api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
        print("Model downloaded successfully!")
        print(f"Current working directory: {os.getcwd()}")
        print(f"Model directory contents: {os.listdir(model_name)}")

        conversion_script = "convert-hf-to-gguf.py"
        fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
        result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
        print(result)
        if result.returncode != 0:
            raise Exception(f"Error converting to fp16: {result.stderr}")
        print("Model converted to fp16 successfully!")
        print(f"Converted model path: {fp16}")

        imatrix_path = "llama.cpp/imatrix.dat"

        if use_imatrix:
            if train_data_file:
                train_data_path = train_data_file.name
            else:
                train_data_path = "groups_merged.txt"

            print(f"Training data file path: {train_data_path}")

            if not os.path.isfile(train_data_path):
                raise Exception(f"Training data file not found: {train_data_path}")

            generate_importance_matrix(fp16, train_data_path)
        else:
            print("Not using imatrix quantization.")
        username = get_username(hf_token)
        quantized_gguf_name = f"{model_name.lower()}-{q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
        quantized_gguf_path = quantized_gguf_name
        if use_imatrix:
            quantise_ggml = f"./llama.cpp/llama-quantize --imatrix {imatrix_path} {fp16} {quantized_gguf_path} {q_method}"
        else:
            quantise_ggml = f"./llama.cpp/llama-quantize {fp16} {quantized_gguf_path} {q_method}"
        result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
        if result.returncode != 0:
            raise Exception(f"Error quantizing: {result.stderr}")
        print(f"Quantized successfully with {q_method} option!")
        print(f"Quantized model path: {quantized_gguf_path}")

        new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
        new_repo_id = new_repo_url.repo_id
        print("Repo created successfully!", new_repo_url)

        if split_model:
            split_upload_model(quantized_gguf_name, f"{username}/{model_name}-{q_method}-GGUF", hf_token, split_max_tensors, split_max_size)
        else:
            api.upload_file(path_or_fileobj=quantized_gguf_name, path_in_repo=quantized_gguf_name, repo_id=new_repo_id)

        print("Model uploaded successfully!")

        update_model_card(model_id, username, model_name, q_method, hf_token, new_repo_id, quantized_gguf_name)
        print("Model card created successfully!")

        return f"Model processed and uploaded successfully! Check it out [here](https://huggingface.co/{new_repo_id})"

    except Exception as e:
        print(f"Error: {e}")
        return f"Error: {e}"

Collecting huggingface_hub==0.25.0
  Downloading huggingface_hub-0.25.0-py3-none-any.whl.metadata (13 kB)
Downloading huggingface_hub-0.25.0-py3-none-any.whl (436 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/436.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.6/436.4 kB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m436.4/436.4 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface-hub 0.23.5
    Uninstalling huggingface-hub-0.23.5:
      Successfully uninstalled huggingface-hub-0.23.5
Successfully installed huggingface_hub-0.25.0


Collecting protobuf==5.29.3
  Downloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)
Downloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl (319 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/319.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/319.7 kB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m319.7/319.7 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 4.25.6
    Uninstalling protobuf-4.25.6:
      Successfully uninstalled protobuf-4.25.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but y

Collecting pandas==2.2.2
  Downloading pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting numpy>=1.23.2 (from pandas==2.2.2)
  Using cached numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting python-dateutil>=2.8.2 (from pandas==2.2.2)
  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting pytz>=2020.1 (from pandas==2.2.2)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas==2.2.2)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas==2.2.2)
  Downloading six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)
Downloading pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m111.1 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached nu

Collecting numpy==2.1.0
  Downloading numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.3/16.3 MB[0m [31m106.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.5
    Uninstalling numpy-2.2.5:
      Successfully uninstalled numpy-2.2.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, b

In [37]:
!pip install gradio



In [40]:
!pip install --force-reinstall pandas==2.2.2
!pip install --force-reinstall numpy>=1.20.0 # Upgraded NumPy here

Collecting pandas==2.2.2
  Using cached pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting numpy>=1.23.2 (from pandas==2.2.2)
  Using cached numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting python-dateutil>=2.8.2 (from pandas==2.2.2)
  Using cached python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting pytz>=2020.1 (from pandas==2.2.2)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas==2.2.2)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas==2.2.2)
  Using cached six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)
Using cached pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
Using cached numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
Using cached python_dateutil-2.9.0.post0-

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 2.2.5 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.2.5 which is incompatible.
ydf 0.11.0 requires protobuf<6.0.0,>=5.29.1, but you have protobuf 4.25.6 which is incompatible.
peft 0.14.0 requires huggingface-hub>=0.25.0, but you have huggingface-hub 0.23.5 which is incompatible.
torchvision 0.21.0+cu124 requires torch==2.6.0, but you have torch 2.2.2+cpu which is incompatible.[0m[31m
[0m



In [43]:
import gradio as gr
from huggingface_hub import HfApi, ModelCard
import os
import subprocess
from textwrap import dedent

def gradio_app():
    with gr.Blocks() as demo:
        hf_token = gr.State(None)

        # ----- Home Tab -----
        with gr.Tab("Home"):
            with gr.Column():
                model_id_input = gr.Textbox(label="Model ID", placeholder="Enter the model ID (e.g., username/model-name)")

                # Standard quantization method options
                standard_quant_methods = [
                    "Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L",
                    "Q4_0", "Q4_K_S", "Q4_K_M",
                    "Q5_0", "Q5_K_S", "Q5_K_M",
                    "Q6_K", "Q8_0"
                ]

                # Imatrix quantization method options
                imatrix_quant_methods = ["IQ3_M", "IQ3_XXS", "IQ4_NL", "IQ4_XS"]

                quant_method_input = gr.Dropdown(
                    choices=standard_quant_methods,
                    label="Quantization Method",
                    info="Select the GGML quantization type",
                    value="Q4_K_M",
                    filterable=False,
                    visible=True
                )

                use_imatrix_checkbox = gr.Checkbox(
                    value=False,
                    label="Use Imatrix Quantization",
                    info="Use importance matrix for quantization."
                )

                private_repo_checkbox = gr.Checkbox(
                    value=False,
                    label="Private Repo",
                    info="Create a private repo under your username."
                )

                # Initially hidden (shown when use_imatrix_checkbox is checked)
                train_data_file_input = gr.File(
                    label="Training Data File",
                    file_types=["txt"],
                    visible=False
                )

                split_model_checkbox = gr.Checkbox(
                    value=False,
                    label="Split Model",
                    info="Shard the model using gguf-split."
                )

                # Initially hidden (shown when split_model_checkbox is checked)
                split_max_tensors_input = gr.Number(
                    value=256,
                    label="Max Tensors per File",
                    info="Maximum number of tensors per file when splitting model.",
                    visible=False
                )
                split_max_size_input = gr.Textbox(
                    label="Max File Size",
                    info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default.",
                    visible=False
                )

                process_button = gr.Button("Process Model")
                output = gr.Markdown(label="Output")

                # ----- Dynamic Visibility Updates -----
                def update_quant_methods(use_imatrix):
                    if use_imatrix:
                        return gr.update(choices=imatrix_quant_methods, value=imatrix_quant_methods[0]), gr.update(visible=True)
                    else:
                        return gr.update(choices=standard_quant_methods, value=standard_quant_methods[0]), gr.update(visible=False)

                use_imatrix_checkbox.change(
                    fn=update_quant_methods,
                    inputs=use_imatrix_checkbox,
                    outputs=[quant_method_input, train_data_file_input]
                )

                split_model_checkbox.change(
                    fn=lambda split_model: gr.update(visible=split_model),
                    inputs=split_model_checkbox,
                    outputs=[split_max_tensors_input, split_max_size_input]
                )

        # ----- Settings Tab -----
        with gr.Tab("Settings"):
            with gr.Column():
                login_btn = gr.Button("Login with HuggingFace Token")
                logout_btn = gr.Button("Logout")

                hf_token_input = gr.Textbox(
                    label="HuggingFace Token",
                    type="password",
                    placeholder="Enter your HuggingFace token"
                )

            status_output = gr.Markdown()  # For displaying login/logout status

        # ----- Login/Logout Functions -----

        def get_username(hf_token):
            """Retrieves the username associated with the Hugging Face token."""
            api = HfApi()
            try:
                whoami_info = api.whoami(token=hf_token)
                username = whoami_info["name"]
                return username
            except Exception as e:
                raise ValueError(f"Error retrieving username: {e}")

        def login(token):
            hf_token.value = token  # Update the stored token
            # Get username
            username = get_username(token)  # Call the new function
            print(f"Logged in as: {username}")  # Display for verification
            return "Logged in successfully!", hf_token

        def logout():
            hf_token.value = None
            return "Logged out successfully!"

        # ----- Click Events -----
        login_btn.click(login, inputs=[hf_token_input], outputs=[status_output, hf_token])
        logout_btn.click(logout, outputs=[status_output])
        process_button.click(
            process_model,
            inputs=[
                model_id_input, quant_method_input, use_imatrix_checkbox,
                private_repo_checkbox, train_data_file_input,
                split_model_checkbox, split_max_tensors_input, split_max_size_input,
                hf_token
            ],
            outputs=output,
        )

    return demo

demo = gradio_app()

if __name__ == "__main__":
    demo.launch(debug=True)


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [39]:
# Download the converted model
#from google.colab import files
#files.download('C-Nocturnum/Meta-Llama-3-8B-Instruct-cyber-abliterated.gguf')