In [1]:
from types import SimpleNamespace

import ipywidgets as widgets
from IPython.display import clear_output, display
from mlx_lm import generate, load
from mlx_lm.chat import load, make_prompt_cache, make_sampler, stream_generate

In [2]:
# model_id = "mlx-community/AceReason-Nemotron-14B-8bit"
model_id = "mlx-community/AceReason-Nemotron-7B-8bit"

In [3]:
# Create args namespace with custom values
args = SimpleNamespace(
    model=model_id,
    adapter_path=None,
    temp=0.7,
    top_p=0.9,
    seed=None,
    max_kv_size=None,
    max_tokens=2_000
)

In [4]:
model, tokenizer = load(
    args.model,
    adapter_path=args.adapter_path,
    tokenizer_config={"trust_remote_code": True}
)

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors.index.json:   0%|          | 0.00/62.7k [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.32G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.78G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/4.49k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/485 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/940 [00:00<?, ?B/s]

In [5]:
prompt = "Explain in mathematical theoretical details hermite interpolation"

In [6]:
# messages = [{"role": "user", "content": prompt}]
# prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True)

In [7]:
generate(model, tokenizer, prompt=prompt, verbose=True, max_tokens=2500)

Let's consider the Hermit interpolation for the function f(x) = e^x on the interval [0, 1] with nodes at x0 = 0 and x1 = 1. Find the error bound for the approximation at x = 0.5.

Okay, so I need to figure out the error bound for the Hermite interpolation of f(x) = e^x on [0,1] with nodes at 0 and 1, and evaluate it at x=0.5. Hmm, let me recall what Hermite interpolation is.

From what I remember, Hermite interpolation is a method where we have not only the function values at certain points (nodes) but also their derivatives. So, unlike the standard Lagrange interpolation which only uses the function values, Hermite interpolation uses both the function and its derivatives at each node. This usually gives a better approximation because it incorporates more information about the function's behavior.

In this problem, the function is f(x) = e^x, and the interval is [0,1] with nodes at x0=0 and x1=1. So, we have two nodes, each with function values and first derivatives, I suppose. Wait, a

'Let\'s consider the Hermit interpolation for the function f(x) = e^x on the interval [0, 1] with nodes at x0 = 0 and x1 = 1. Find the error bound for the approximation at x = 0.5.\n\nOkay, so I need to figure out the error bound for the Hermite interpolation of f(x) = e^x on [0,1] with nodes at 0 and 1, and evaluate it at x=0.5. Hmm, let me recall what Hermite interpolation is.\n\nFrom what I remember, Hermite interpolation is a method where we have not only the function values at certain points (nodes) but also their derivatives. So, unlike the standard Lagrange interpolation which only uses the function values, Hermite interpolation uses both the function and its derivatives at each node. This usually gives a better approximation because it incorporates more information about the function\'s behavior.\n\nIn this problem, the function is f(x) = e^x, and the interval is [0,1] with nodes at x0=0 and x1=1. So, we have two nodes, each with function values and first derivatives, I suppose

In [None]:
prompt_cache = make_prompt_cache(model, args.max_kv_size)

In [None]:
def get_response(query):
    response_text = ""
    for response in stream_generate(
        model,
        tokenizer,
        tokenizer.apply_chat_template([{"role": "user", "content": query}], add_generation_prompt=True),
        max_tokens=args.max_tokens,
        sampler=make_sampler(args.temp, args.top_p),
        prompt_cache=prompt_cache
    ):
        print(response.text, end='', flush=True)
        response_text += response.text

    return response_text

In [None]:
def chat_interface():
    # Textarea for better paste support
    text_input = widgets.Textarea(
        placeholder='Type or paste your question here...',
        description='Query:',
        layout=widgets.Layout(
            width='50%',
            height='100px'
        )
    )
    
    # Add a Submit button since Textarea doesn't have on_submit
    submit_button = widgets.Button(
        description='Submit',
        layout=widgets.Layout(width='100px')
    )
    
    output_area = widgets.Output()
    conversation_history = []
    
    def on_submit(b):
        with output_area:
            query = text_input.value
            if not query.strip():
                return
                
            clear_output()
            
            for _, exchange in enumerate(conversation_history, 1):
                print(f"\nQ: {exchange['Q']}\n")
                print(f"A: {exchange['A']}\n")
                print("-" * 50)
            
            print(f"\nQ: {query}\n")
            print("A: ", end='', flush=True)
            response_text = get_response(query)
            print("\n" + "-" * 50 + "\n")
            conversation_history.append({"Q": query, "A": response_text})
            
        text_input.value = ''
    
    submit_button.on_click(on_submit)
    
    # Create input container with textarea and button side by side
    input_container = widgets.HBox([text_input, submit_button])
    
    # Stack output above input
    vbox = widgets.VBox([output_area, input_container])
    display(vbox)

In [None]:
chat_interface()