In [None]:
#@markdown # Installation
#@markdown Run this cell to install `llama-zip` with the necessary dependencies. You will have two options for the installation:
#@markdown 1. **Regular Installation (CPU Only)**: This option will install `llama-cpp-python` with support for CPU inference only. This will only take a minute or two, but compression and decompression speeds will be limited (1-2 tokens/sec).
#@markdown 2. **Enable GPU Support (Takes ~15 Minutes)**: This option will install `llama-cpp-python` with CUDA support for faster inference using the GPU. This will take around 15 minutes, and compression and decompression speeds will be much faster (20+ tokens/sec).

import ipywidgets as widgets
from IPython.display import display, clear_output

button_cpu = widgets.Button(description="Regular Installation (CPU Only)", button_style='primary', layout=widgets.Layout(width='300px'))
button_gpu = widgets.Button(description="Enable GPU Support (Takes ~15 Minutes)", button_style='warning', layout=widgets.Layout(width='300px'))

def on_button_cpu_clicked(b):
    clear_output()

    !git clone https://github.com/alexbuz/llama-zip.git
    %cd llama-zip
    %pip install .

    clear_output()
    inf('CPU Only Installation Done!', 'success', '300px')

def on_button_gpu_clicked(b):
    clear_output()

    !git clone https://github.com/alexbuz/llama-zip.git
    %cd llama-zip
    %pip install .

    !CMAKE_ARGS="-DLLAMA_CUDA=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --no-cache-dir --verbose

    clear_output()
    inf('GPU Support Enabled!', 'success', '300px')

button_cpu.on_click(on_button_cpu_clicked)
button_gpu.on_click(on_button_gpu_clicked)

display(button_cpu, button_gpu)

def inf(msg, style, wdth):
    inf = widgets.Button(description=msg, disabled=True, button_style=style, layout=widgets.Layout(min_width=wdth))
    display(inf)


In [None]:
#@markdown # LLM Download
#@markdown Run this cell to download an LLM. This is necessary for `llama-zip` to function, as the LLM guides the compression and decompression process.


import os

import ipywidgets as widgets
from IPython.display import display, clear_output

llm_options = {
    "Meta-Llama-3-8B (Q8_0)": "https://huggingface.co/QuantFactory/Meta-Llama-3-8B-GGUF/resolve/main/Meta-Llama-3-8B.Q8_0.gguf",
    "Meta-Llama-3-8B (Q4_K_M)": "https://huggingface.co/QuantFactory/Meta-Llama-3-8B-GGUF/resolve/main/Meta-Llama-3-8B.Q4_K_M.gguf",
}

dropdown_llm = widgets.Dropdown(
    options=["Select an LLM..."] + list(llm_options.keys()),
    description="LLM:"
)

def on_llm_select(change):
    global llm_path
    if change["new"] != "Select an LLM...":
        clear_output()

        selected_llm = change["new"]
        llm_url = llm_options[selected_llm]
        llm_path = llm_url.split("/")[-1]

        if not os.path.isfile(llm_path):
            !wget {llm_url}

        clear_output()
        inf(f"{selected_llm} Downloaded!", "success", "300px")

dropdown_llm.observe(on_llm_select, names="value")

display(dropdown_llm)

def inf(msg, style, wdth):
    inf = widgets.Button(description=msg, disabled=True, button_style=style, layout=widgets.Layout(min_width=wdth))
    display(inf)


In [None]:
#@markdown # Start Interactive Mode (Recommended)
!llama-zip $llm_path -i

In [None]:
#@markdown # Compress Text
Text = "" #@param {type:"string"}

!llama-zip $llm_path -c "$Text"

In [None]:
#@markdown # Decompress Text
Text = "" #@param {type:"string"}

!llama-zip $llm_path -d $Text

# 📦 Repo: [GitHub](https://github.com/AlexBuz/llama-zip)
### Original Colab notebook by laVashik
### Improved by AlexBuz