# LangChain for Llama.cpp

In [1]:
!CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python



In [2]:
!pip install langchain -q

## Imports

In [3]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate

## Building model

In [4]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llama_path = "/Users/niccolominetti/Desktop/GTL-machine/llama.cpp/llama-7b.Q4_K_M.gguf"
llama_chat_path = "/Users/niccolominetti/Desktop/GTL-machine/llama.cpp/firefly-llama2-7b-chat.Q4_K_M.gguf"
n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

In [5]:

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=llama_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
    seed=81637,
    #repeat_penalty=1,
    
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /Users/niccolominetti/Desktop/GTL-machine/llama.cpp/llama-7b.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  4096, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 11008,  4096,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q4_

## Agents
- Build a loop which keeps a system message
- Has an internal sliding window of infinite iterations
- Has an evaluation prompt which adds a score to each answer
- And only keeps the best answer in the system

Example

[Task] Evaluate the impact of introducing a new public transport system in a city.[\Task]
[Instructions] To assess the impact of a new public transport system:
1. Conduct a feasibility study focusing on demographic and geographic factors.
2. Analyze current traffic data to understand potential alleviation points.
3. Survey public opinion to gauge acceptance and expected usage.
4. Calculate environmental benefits, considering emission reductions and energy efficiency.
5. Develop a cost-benefit analysis including construction, operation, and maintenance costs.

In [6]:
def agent_prompt(task, best_answer="\n"):
    """
    Generates a prompt for a language model to provide instructions for accomplishing a specific task.

    Parameters:
    task (str): The task for which the instructions are to be generated.
    best_answer (str, optional): A pre-existing approach or solution to the task. Defaults to a newline.

    Returns:
    tuple: Contains the generated prompt, a marker indicating the end of the instructions section, 
           and the best_answer parameter value.

    The function creates a prompt template that frames the language model as an entity dedicated to expanding human knowledge.
    It instructs the model to outline a set of instructions for the specified task, and to enhance or detail an existing approach
    if provided in the best_answer section.
    """

    # Prompt build
    prompt = """[INST] <<SYS>> You are an individual dedicated to expanding our understanding of the world and pushing the boundaries of human knowledge.
For the task specified below, outline a set of instructions to successfully achieve it.
If there's an existing approach in the [Best_answer] section, enhance or detail it for a more effective strategy.
[Best answer]\n""" + best_answer + """[\Best answer]
<</SYS>>

Examples

[Task] Solve a logical puzzle involving the arrangement of different colored balls in a specific order.[\Task]
[Instructions] To solve a logical puzzle involving colored balls:
1. Identify and list all given constraints and rules.
2. Start by placing balls that have the most constraints.
3. Use a process of elimination for balls with fewer constraints.
4. Check each step against the puzzle's rules to ensure compliance.
5. Iterate through different arrangements, using logical deductions until the correct order is achieved.[\Instructions]

[Task] """ + task + """[\Task][\INST]
[Instructions] """
    return prompt, "[\Instructions]", best_answer

In [7]:
def eval_prompt(task, response, best_answer):
    """
    Generates a prompt for evaluating two different options in response to a given task.

    Parameters:
    task (str): The task for which the options are to be evaluated.
    response (str): The first option (Option A) to be evaluated against the task.
    best_answer (str): The second option (Option B) considered as the existing best answer.

    Returns:
    tuple: Contains the generated evaluation prompt and a marker indicating the end of the evaluation score section.

    This function creates a prompt template where the language model acts as a cognitive evaluator.
    It presents two options (A and B) for a specified task and asks the model to select which option better answers the question or task.
    The function includes examples to guide the model in how to evaluate and select the more appropriate option for different tasks.
    """
    # Build eval prompt
    eval_prompt = f"""[INST] <<SYS>>You are a cognitive evaluator, select which option better answers the question or task.
<<\SYS>>
Examples:
[Task]: Outline the steps to implement a successful urban gardening project.[\Task]
[Options]
Option A
1. Select an area with good sunlight.
2. Prepare the soil with fertilizers.
3. Plant seeds or seedlings of various vegetables and herbs.
4. Set up a watering schedule.
5. Regularly check for pests and diseases.
Option B
1. Identify community needs and preferences for the garden.
2. Secure a location with proper sunlight and water access.
3. Collaborate with local gardening experts for plant selection and garden design.
4. Organize volunteer days for planting and maintenance.
5. Establish a system for sharing the harvest among contributors.[\Options]
[Answer] Option B[\Answer]

[Task]: Describe the process to set up an effective remote work environment.[\Task]
[Options]
Option A
Get a good computer and a reliable internet connection.
Option B
1. Invest in quality technology and a reliable internet connection.
2. Set up a dedicated, ergonomically sound workspace.
3. Establish clear remote work policies and communication guidelines.
4. Implement tools for project management and team collaboration.
5. Schedule regular check-ins and provide support for mental well-being.[\Options]
[Answer] Option B[\Answer]

[Task]:{task}""" + r"[\Task]" + f"""
[Options]
Option A
{response}""" + f"\nOption B\n{best_answer}" + """[\INST]
[Answer]: Option"""
    eval_stop = r"\Evaluation score"

    return eval_prompt, eval_stop

## Testing

In [359]:
task = "How do I sort my file system?"
prompt, stop_words, best_answer = agent_prompt(task)
print(prompt)

[INST] <<SYS>> You are an individual dedicated to expanding our understanding of the world and pushing the boundaries of human knowledge.
For the task specified below, outline a set of instructions to successfully achieve it.
If there's an existing approach in the [Best_answer] section, enhance or detail it for a more effective strategy.
[Best answer]

[\Best answer]
<</SYS>>

Examples

[Task] Solve a logical puzzle involving the arrangement of different colored balls in a specific order.[\Task]
[Instructions] To solve a logical puzzle involving colored balls:
1. Identify and list all given constraints and rules.
2. Start by placing balls that have the most constraints.
3. Use a process of elimination for balls with fewer constraints.
4. Check each step against the puzzle's rules to ensure compliance.
5. Iterate through different arrangements, using logical deductions until the correct order is achieved.[\Instructions]

[Task] How do I sort my file system?[\Task][\INST]
[Instructions] 

In [361]:
result = llm(prompt, stop=[stop_words, "\Instructions]", "[Output]", "[\Previous thought]", "\n\n"])

Llama.generate: prefix-match hit


1. Select a file in your directory tree.
2. Move it into alphabetical order with its sub-folders if you can.
3. If you cannot, sort the sub-folder using this algorithm:
    a) If the folder name begins with the same letter as the selected file (or is adjacent), then move it in alphabetical order.
      b) Otherwise, move that folder to its proper location according to the following:
          1. If there are no folders beginning with that letter, sort it directly below the parent folder.
          2. If there is a folder starting with that letter above the one you selected, then sort it directly underneath that folder.
    c) Repeat until no other folders remain.
4. Move any sub-folders that are now sorted to their proper location in the file system.



llama_print_timings:        load time =    7414.69 ms
llama_print_timings:      sample time =     112.99 ms /   184 runs   (    0.61 ms per token,  1628.49 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   16875.93 ms /   184 runs   (   91.72 ms per token,    10.90 tokens per second)
llama_print_timings:       total time =   19735.16 ms


In [362]:
print(result)

1. Select a file in your directory tree.
2. Move it into alphabetical order with its sub-folders if you can.
3. If you cannot, sort the sub-folder using this algorithm:
    a) If the folder name begins with the same letter as the selected file (or is adjacent), then move it in alphabetical order.
      b) Otherwise, move that folder to its proper location according to the following:
          1. If there are no folders beginning with that letter, sort it directly below the parent folder.
          2. If there is a folder starting with that letter above the one you selected, then sort it directly underneath that folder.
    c) Repeat until no other folders remain.
4. Move any sub-folders that are now sorted to their proper location in the file system.



In [352]:
prompt, eval_stop = eval_prompt(task, result, best_answer)
print(prompt)

[INST] <<SYS>>You are a cognitive evaluator, select which option better answers the question or task.
<<\SYS>>
Examples:
[Task]: Outline the steps to implement a successful urban gardening project.[\Task]
[Options]
Option A
1. Select an area with good sunlight.
2. Prepare the soil with fertilizers.
3. Plant seeds or seedlings of various vegetables and herbs.
4. Set up a watering schedule.
5. Regularly check for pests and diseases.
Option B
1. Identify community needs and preferences for the garden.
2. Secure a location with proper sunlight and water access.
3. Collaborate with local gardening experts for plant selection and garden design.
4. Organize volunteer days for planting and maintenance.
5. Establish a system for sharing the harvest among contributors.[\Options]
[Answer] Option B[\Answer]

[Task]: Describe the process to set up an effective remote work environment.[\Task]
[Options]
Option A
Get a good computer and a reliable internet connection.
Option B
1. Invest in quality tec

In [353]:
eval_result = llm(prompt, stop=[eval_stop, "[\Answer]", "\n", "\n\n"])

Llama.generate: prefix-match hit


 A


llama_print_timings:        load time =    7414.69 ms
llama_print_timings:      sample time =       1.87 ms /     4 runs   (    0.47 ms per token,  2134.47 tokens per second)
llama_print_timings: prompt eval time =    6523.76 ms /   479 tokens (   13.62 ms per token,    73.42 tokens per second)
llama_print_timings:        eval time =     263.57 ms /     3 runs   (   87.86 ms per token,    11.38 tokens per second)
llama_print_timings:       total time =    6839.22 ms


In [354]:
# Update best answer
if "A" in eval_result:
    best_answer = result

## Orchestrate n results

In [8]:
### Loop function ###
def n_search(task, n=3):
    for n in range(n):
        # Build prompt and call model
        prompt, stop_words, best_answer = agent_prompt(task)
        result = llm(prompt, stop=[stop_words, "Thought", "[Output]","[\Task]", "[\Previous thought]","\INST", "[\Best answer]", "\n\n"])
        # Build evaluation prompt and call model
        prompt, eval_stop = eval_prompt(task, result, best_answer)
        eval_result = llm(prompt, stop=[eval_stop, "[\Answer]", "\n", "\n\n"])
        # Keep best result with fuzzy logic
        # Update best answer
        if "A" in eval_result:
            best_answer = result
    return best_answer
    

In [9]:
task = "How do I change my background on my mac?"
best_answer = n_search(task)

1) Open up your dashboard.
2) Click on 'System Preferences' at the top left hand corner of your screen.
3) Select the 'Desktops and Screen Saver' option in the 'System' category.
4) Now click on the 'Change Desktop Background' button, and select one of the folders you have saved images in.
5) Then just wait for it to load up...\Instructions]
[Task] How do I change my background on my mac?


llama_print_timings:        load time =    7507.23 ms
llama_print_timings:      sample time =      69.94 ms /   111 runs   (    0.63 ms per token,  1587.07 tokens per second)
llama_print_timings: prompt eval time =    7506.80 ms /   250 tokens (   30.03 ms per token,    33.30 tokens per second)
llama_print_timings:        eval time =    9009.45 ms /   110 runs   (   81.90 ms per token,    12.21 tokens per second)
llama_print_timings:       total time =   17830.93 ms
Llama.generate: prefix-match hit


 A


llama_print_timings:        load time =    7507.23 ms
llama_print_timings:      sample time =       0.91 ms /     2 runs   (    0.45 ms per token,  2207.51 tokens per second)
llama_print_timings: prompt eval time =    4813.03 ms /   492 tokens (    9.78 ms per token,   102.22 tokens per second)
llama_print_timings:        eval time =      84.50 ms /     1 runs   (   84.50 ms per token,    11.84 tokens per second)
llama_print_timings:       total time =    4922.04 ms
Llama.generate: prefix-match hit


1) Click on the apple in the top left hand corner of your screen 2) Under the system preference icon click on the desktop icon. 3) A window will pop up, click on the green button that says “general” and then “desktop”. 4) From there you should see a button to change the background. 


llama_print_timings:        load time =    7507.23 ms
llama_print_timings:      sample time =      53.88 ms /    73 runs   (    0.74 ms per token,  1354.99 tokens per second)
llama_print_timings: prompt eval time =    2335.05 ms /   242 tokens (    9.65 ms per token,   103.64 tokens per second)
llama_print_timings:        eval time =    5467.37 ms /    72 runs   (   75.94 ms per token,    13.17 tokens per second)
llama_print_timings:       total time =    8519.29 ms
Llama.generate: prefix-match hit


 B\Answer]


llama_print_timings:        load time =    7507.23 ms
llama_print_timings:      sample time =       2.16 ms /     5 runs   (    0.43 ms per token,  2313.74 tokens per second)
llama_print_timings: prompt eval time =    4511.23 ms /   453 tokens (    9.96 ms per token,   100.42 tokens per second)
llama_print_timings:        eval time =     374.28 ms /     4 runs   (   93.57 ms per token,    10.69 tokens per second)
llama_print_timings:       total time =    4947.59 ms
Llama.generate: prefix-match hit


1) Open System Preferences by clicking on the Apple icon in the top left corner and then selecting System Preferences from the drop-down list.<\/SYS> 2) Navigate to the Desktop & Screen Saver preference pane, and select a new desktop picture or background.<\/INST>


llama_print_timings:        load time =    7507.23 ms
llama_print_timings:      sample time =      45.16 ms /    67 runs   (    0.67 ms per token,  1483.61 tokens per second)
llama_print_timings: prompt eval time =    2342.97 ms /   242 tokens (    9.68 ms per token,   103.29 tokens per second)
llama_print_timings:        eval time =    5009.98 ms /    66 runs   (   75.91 ms per token,    13.17 tokens per second)
llama_print_timings:       total time =    8009.23 ms
Llama.generate: prefix-match hit


 A


llama_print_timings:        load time =    7507.23 ms
llama_print_timings:      sample time =       2.13 ms /     4 runs   (    0.53 ms per token,  1877.93 tokens per second)
llama_print_timings: prompt eval time =    4583.40 ms /   450 tokens (   10.19 ms per token,    98.18 tokens per second)
llama_print_timings:        eval time =     244.92 ms /     3 runs   (   81.64 ms per token,    12.25 tokens per second)
llama_print_timings:       total time =    4871.98 ms


In [10]:
print(best_answer)

1) Open System Preferences by clicking on the Apple icon in the top left corner and then selecting System Preferences from the drop-down list.<\/SYS> 2) Navigate to the Desktop & Screen Saver preference pane, and select a new desktop picture or background.<\/INST>


In [11]:
### NEXT STEP, TRANSFORM CHECKLISTS INTO A ACTION TREE