# Preparation

In [1]:
"""
# Download model
huggingface-cli download tiiuae/Falcon3-1B-Instruct-1.58bit --local-dir ~/models/tiiuae/Falcon3-1B-Instruct-1.58bit
# Compile
python setup_env.py -md ~/models/tiiuae/Falcon3-1B-Instruct-1.58bit -q i2_s

#.build/bin/llama-server -m /home/pathfinder/models/tiiuae/Falcon3-1B-Instruct-1.58bit/ggml-model-i2_s.gguf --host 127.0.0.1 --port 8080

# Download Falcon3-1B-Instruct
huggingface-cli download tiiuae/Falcon3-1B-Instruct --local-dir ~/models/tiiuae/Falcon3-1B-Instruct
"""

'\n# Download model\nhuggingface-cli download tiiuae/Falcon3-1B-Instruct-1.58bit --local-dir ~/models/tiiuae/Falcon3-1B-Instruct-1.58bit\n# Compile\npython setup_env.py -md ~/models/tiiuae/Falcon3-1B-Instruct-1.58bit -q i2_s\n\n#.build/bin/llama-server -m /home/pathfinder/models/tiiuae/Falcon3-1B-Instruct-1.58bit/ggml-model-i2_s.gguf --host 127.0.0.1 --port 8080\n\n# Download Falcon3-1B-Instruct\nhuggingface-cli download tiiuae/Falcon3-1B-Instruct --local-dir ~/models/tiiuae/Falcon3-1B-Instruct\n'

# Importing Libraries

In [2]:
import os
from dataclasses import dataclass
from utils import set_seed, BitNet



# Configuration

In [3]:
@dataclass
class CONFIG:
    # Debug
    debug: bool = False
    verbose: bool = True

    # Model
    model_id: str = "tiiuae/Falcon3-1B-Instruct"
    quantized_path: str = "/home/pathfinder/models/tiiuae/Falcon3-1B-Instruct-1.58bit/ggml-model-i2_s.gguf"
    ctx_size: int = 1024

    # Generation
    max_new_tokens: int = 256
    ## Speculative Decoding
    num_assistant_tokens: int = 5
    assistant_confidence_threshold: float = 0.4

    # Device
    n_threads: int = 12

    # Seed
    seed = 42

config = CONFIG()

In [4]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [5]:
set_seed(config.seed)

Random seed set to 42


In [6]:
system_prompt = "You are an helpful assistant."
user_prompt = "Explain quantum mechanics in detail please."
assistant_response = "Quantum mechanics is a fundamental theory in physics that describes the physical properties of nature at the scale of atoms and subatomic particles, such as electrons, protons, and photons."

# Model

In [7]:
bitnet = BitNet(
    model_id=config.model_id,
    quantized_path=config.quantized_path,
    host="127.0.0.1",
    port=8080,
    ctx_size=config.ctx_size,
    n_threads=config.n_threads,
    n_gpu_layers=0,
    batch_size=1,
    slot_id=1
)
bitnet.start_server(verbose=False)
bitnet.init_tokenizer(verbose=False)
bitnet.init_model(verbose=True)

🚀 Starting llama-server on 127.0.0.1:8080
✅ Server is ready.
LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(131072, 2048)
    (layers): ModuleList(
      (0-17): 18 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (v_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
   

In [8]:
bitnet.speculative_decoding(
    text=bitnet.format_falcon_prompt(
        system_prompt=system_prompt,
        user_prompt=user_prompt,
        assistant_response="Quantum"
    ),
    max_new_tokens=100,
    num_assistant_tokens=5,
    confidence_threshold=0.2,
    verbose=True
)


[95m──────────────────────────────────────────────────[0m
✨ Speculative Decoding
├─ Target Model: tiiuae/Falcon3-1B-Instruct
├─ Draft Model: /home/pathfinder/models/tiiuae/Falcon3-1B-Instruct-1.58bit/ggml-model-i2_s.gguf
└─ Draft Length: 5, Confidence: 0.2
[95m──────────────────────────────────────────────────[0m

[95m----------Step 1----------[0m
[94mDraft Input:[0m
<|system|>
You are an helpful assistant.
<|user|>
Explain quantum mechanics in detail please.
<|assistant|>
Quantum
[94mDraft Output[0m
 mechanics is a branch of
┌─────┬─────────────────┬──────────────┬────────────────────┬─────────────────┐
│ Idx │ Draft Token     │  Target Prob │ Status             │ Corrected       │
├─────┼─────────────────┼──────────────┼────────────────────┼─────────────────┤
│ 1   │  mechanics      │      98.83% │ [92m✅ Accepted[0m          │ -               │
│ 2   │  is             │      91.41% │ [92m✅ Accepted[0m          │ -               │
│ 3   │  a              │      95.31% │

' mechanics is a branch of physics that deals with the behavior of matter and energy at the smallest scales, such as atoms and subatomic particles. It is has been a fundamental theory in physics since the early 20th century, and it has revolutionized our understanding of the physical world.<|assistant<|assistant|>\nQuantum mechanics is a branch of physics that deals with the behavior of matter and energy at the smallest scales, such as atoms and subatomic has been a fundamental theory in physics since the early 20'

# Generation

In [9]:
bitnet.generate_hf(
    text=bitnet.format_falcon_prompt(
        system_prompt=system_prompt,
        user_prompt=user_prompt,
        assistant_response=assistant_response
    ),
    max_new_tokens=100,
    verbose=True
)


[95m──────────────────────────────────────────────────
🧠 Generation Info (Hugging Face)
──────────────────────────────────────────────────[0m
[94m💬 User Input:[0m
<|system|>
You are an helpful assistant.
<|user|>
Explain quantum mechanics in detail please.
<|assistant|>
Quantum mechanics is a fundamental theory in physics that describes the physical properties of nature at the scale of atoms and subatomic particles, such as electrons, protons, and photons.

[92m🟢 Generated Text:[0m
 It is a complex and counterintuitive theory that has been developed over the past two centuries, and it has revolutionized our understanding of the universe.

1. Wave-Particle Duality: One of the most fundamental principles of quantum mechanics is that particles can exhibit both wave-like and particle-like properties. This means that a particle can behave like a wave, spreading out and interfering with itself, or it can behave like a particle, having a definite position and momentum.

2. Superpositio

' It is a complex and counterintuitive theory that has been developed over the past two centuries, and it has revolutionized our understanding of the universe.\n\n1. Wave-Particle Duality: One of the most fundamental principles of quantum mechanics is that particles can exhibit both wave-like and particle-like properties. This means that a particle can behave like a wave, spreading out and interfering with itself, or it can behave like a particle, having a definite position and momentum.\n\n2. Superposition'

In [10]:
bitnet.generate_gguf(
    text=bitnet.format_falcon_prompt(
        system_prompt=system_prompt,
        user_prompt=user_prompt,
        assistant_response=assistant_response
    ),
    max_new_tokens=100,
    verbose=True
)


[95m──────────────────────────────────────────────────
🧠 Generation Info
──────────────────────────────────────────────────[0m
[94m💬 User Input:[0m
<|system|>
You are an helpful assistant.
<|user|>
Explain quantum mechanics in detail please.
<|assistant|>
Quantum mechanics is a fundamental theory in physics that describes the physical properties of nature at the scale of atoms and subatomic particles, such as electrons, protons, and photons.

[92m🟢 Generated Text:[0m
 At its core, quantum mechanics is based on the idea that not all electromagnetic fields can be described by classical mechanics through classical fields like electromagnetic tension and dipole moments. Instead, these fields are described by the concept of quantum fields, and these fields can be generated by pairs of entangled particles. The light color of the particle contributes to the field's quantum state, and the system’s qubit relies on superposition to maintain this quantum state over time to change states.



" At its core, quantum mechanics is based on the idea that not all electromagnetic fields can be described by classical mechanics through classical fields like electromagnetic tension and dipole moments. Instead, these fields are described by the concept of quantum fields, and these fields can be generated by pairs of entangled particles. The light color of the particle contributes to the field's quantum state, and the system’s qubit relies on superposition to maintain this quantum state over time to change states."

## Speculative Decoding