# 01-01 : Self Container Llamafile Example

Tested on AWS SageMaker with an `ml.t3.xlarge` instance.

```bash
conda create -n llamfile python=3.10
```

## 0. Install Dependencies

In [None]:
!pip install tqdm requests

### 0.1. Download Llamafile

In [None]:
import requests
from tqdm import tqdm

url = "https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.3-llamafile/resolve/main/Mistral-7B-Instruct-v0.3.Q6_K.llamafile?download=true"
headers = {
    "Authorization": "Bearer hf_itCGxKwnTmhwCGcxvDgfciRyniKMYeZAwhatPk"
}

response = requests.get(url, headers=headers, stream=True)

if response.status_code == 200:
    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024  # 1 Kilobyte
    progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
    
    with open("Mistral-7B-Instruct-v0.3.Q6_K.llamafile", "wb") as file:
        for data in response.iter_content(block_size):
            progress_bar.update(len(data))
            file.write(data)
    progress_bar.close()
else:
    print(f"Failed to download file: {response.status_code}")

In [None]:
# !wget --header="Authorization: Bearer hf_itCGxKwnTmhwCGcxvDgfciRyniKMYeZAwhatPk" "https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.3-llamafile/resolve/main/Mistral-7B-Instruct-v0.3.Q6_K.llamafile?download=true"

In [None]:
# !mv 'Mistral-7B-Instruct-v0.3.Q6_K.llamafile?download=true' Mistral-7B-Instruct-v0.3.Q6_K.llamafile

In [None]:
# !chmod +x Mistral-7B-Instruct-v0.3.Q6_K.llamafile

### 0.2. Install Langchain

In [None]:
!pip install langchain-openai==0.1.16

## 1. Imports

In [None]:
import os
import subprocess
import stat
from typing import Optional
from langchain_openai import ChatOpenAI

## 2. Run Llamafile

In [None]:
# Get the current working directory
cwd = os.getcwd()

# Construct the full path to the executable
executable_path = os.path.join(cwd, 'Mistral-7B-Instruct-v0.3.Q6_K.llamafile')

# Ensure the file has execute permissions
if not os.access(executable_path, os.X_OK):
    st = os.stat(executable_path)
    os.chmod(executable_path, st.st_mode | stat.S_IEXEC)

# Define the arguments separately
arguments = ['--port', '8081', '--host', '0.0.0.0', '--nobrowser']

# Start the executable in the background
llamafile_process = subprocess.Popen(['bash', executable_path] + arguments)
print("Executable started in the background.")

## 3. Create LLM Client

In [None]:
def create_chat(
        base_url:str='http://localhost:8081/v1',
        model:str="mozilla/Mistral-7B-Instruct-v0.3-llamafile",
        temperature:float=0.001,
        max_retries:int=3,
        max_tokens: Optional[int] = None,
        timeout:int=20):
    """
    Create the client to interact with vodaGPT
    """
    # set the model_kwargs
    model_kwargs = {}
    return ChatOpenAI(
        base_url=base_url,
        api_key="NA",
        temperature=temperature,
        max_retries=max_retries,
        max_tokens=max_tokens,
        timeout=timeout,
        model=model,
        model_kwargs=model_kwargs)

In [None]:
# create the client
llm = create_chat(temperature=0.7, max_tokens=512)

## 4. Test LLM

In [None]:
respone = llm.invoke("Why is the sky blue")
print(response.content.strip())

## 10. Stop Llamafile

In [None]:
llamafile_process.terminate()