# LangChain for Llama.cpp

In [1]:
!CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python



In [5]:
!pip install langchain -q

## Imports

In [6]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate

## Building model

In [75]:
# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

orca_path = "/Users/niccolominetti/Desktop/GTL-machine/llama.cpp/orca-2-7b.Q4_K_M.gguf"
n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

In [104]:

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=orca_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx= 1024, # ctx_size=1024,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
    seed=81637,
    #repeat_penalty=1,
    
)

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /Users/niccolominetti/Desktop/GTL-machine/llama.cpp/orca-2-7b.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  4096, 32003,     1,     1 ]
llama_model_loader: - tensor    1:              blk.0.attn_q.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    2:              blk.0.attn_k.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    3:              blk.0.attn_v.weight q6_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    4:         blk.0.attn_output.weight q4_K     [  4096,  4096,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_gate.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.ffn_up.weight q4_K     [  4096, 11008,     1,     1 ]
llama_model_loader: - tensor    7:            blk.0.ffn_down.

## Agents
- Build a loop which keeps a system message
- Has an internal sliding window of infinite iterations
- Has an evaluation prompt which adds a score to each answer
- And only keeps the best answer in the system

Example

[Task] Evaluate the impact of introducing a new public transport system in a city.[\Task]
[Instructions] To assess the impact of a new public transport system:
1. Conduct a feasibility study focusing on demographic and geographic factors.
2. Analyze current traffic data to understand potential alleviation points.
3. Survey public opinion to gauge acceptance and expected usage.
4. Calculate environmental benefits, considering emission reductions and energy efficiency.
5. Develop a cost-benefit analysis including construction, operation, and maintenance costs.

In [97]:
def create_agent_prompt(task, best_answer="\n"):
    """
    Generates a prompt for a language model to provide instructions for accomplishing a specific task.

    Parameters:
    task (str): The task for which the instructions are to be generated.
    best_answer (str, optional): A pre-existing approach or solution to the task. Defaults to a newline.

    Returns:
    tuple: Contains the generated prompt, a marker indicating the end of the instructions section, 
           and the best_answer parameter value.

    The function creates a prompt template that frames the language model as an entity dedicated to expanding human knowledge.
    It instructs the model to outline a set of instructions for the specified task, and to enhance or detail an existing approach
    if provided in the best_answer section.
    """

    # Prompt build
    prompt = """[INST] <<SYS>> You are an individual dedicated to expanding our understanding of the world and pushing the boundaries of human knowledge.
For the task specified below, outline a set of instructions to successfully achieve it.
If there's an existing approach in the [Best_answer] section, enhance or detail it for a more effective strategy.
[Best answer]\n""" + best_answer + """[\Best answer]
<</SYS>>

Examples

[Task] Solve a logical puzzle involving the arrangement of different colored balls in a specific order.[\Task]
[Instructions] To solve a logical puzzle involving colored balls:
1. Identify and list all given constraints and rules.
2. Start by placing balls that have the most constraints.
3. Use a process of elimination for balls with fewer constraints.
4. Check each step against the puzzle's rules to ensure compliance.
5. Iterate through different arrangements, using logical deductions until the correct order is achieved.[\Instructions]

[Task] """ + task + """[\Task][\INST]
[Instructions] """
    return prompt, "[\Instructions]"

In [96]:
def create_eval_prompt(task, response, best_answer):
    """
    Generates a prompt for evaluating two different options in response to a given task.

    Parameters:
    task (str): The task for which the options are to be evaluated.
    response (str): The first option (Option A) to be evaluated against the task.
    best_answer (str): The second option (Option B) considered as the existing best answer.

    Returns:
    tuple: Contains the generated evaluation prompt and a marker indicating the end of the evaluation score section.

    This function creates a prompt template where the language model acts as a cognitive evaluator.
    It presents two options (A and B) for a specified task and asks the model to select which option better answers the question or task.
    The function includes examples to guide the model in how to evaluate and select the more appropriate option for different tasks.

    Extra example
    [Task]: Describe the process to set up an effective remote work environment.[\Task]
    [Options]
    Option A
    Get a good computer and a reliable internet connection.
    Option B
    1. Invest in quality technology and a reliable internet connection.
    2. Set up a dedicated, ergonomically sound workspace.
    3. Establish clear remote work policies and communication guidelines.
    4. Implement tools for project management and team collaboration.
    5. Schedule regular check-ins and provide support for mental well-being.[\Options]
    [Answer] Option B[\Answer]
    """
    # Build eval prompt
    eval_prompt = f"""[INST] <<SYS>>You are a cognitive evaluator, select which option better answers the question or task.
<<\SYS>>
Examples:
[Task]: Outline the steps to implement a successful urban gardening project.[\Task]
[Options]
Option A
1. Select an area with good sunlight.
2. Prepare the soil with fertilizers.
3. Plant seeds or seedlings of various vegetables and herbs.
4. Set up a watering schedule.
5. Regularly check for pests and diseases.
Option B
1. Identify community needs and preferences for the garden.
2. Secure a location with proper sunlight and water access.
3. Collaborate with local gardening experts for plant selection and garden design.
4. Organize volunteer days for planting and maintenance.
5. Establish a system for sharing the harvest among contributors.[\Options]
[Answer] Option B[\Answer]

[Task]:{task}""" + r"[\Task]" + f"""
[Options]
Option A
{response}""" + f"\nOption B\n{best_answer}" + """[\INST]
[Answer]: Option"""
    eval_stop = r"\Evaluation score"

    return eval_prompt, eval_stop

## Testing

In [77]:
task = "How do I sort my file system?"
prompt, stop_words, best_answer = agent_prompt(task)
print(prompt)

[INST] <<SYS>> You are an individual dedicated to expanding our understanding of the world and pushing the boundaries of human knowledge.
For the task specified below, outline a set of instructions to successfully achieve it.
If there's an existing approach in the [Best_answer] section, enhance or detail it for a more effective strategy.
[Best answer]

[\Best answer]
<</SYS>>

Examples

[Task] Solve a logical puzzle involving the arrangement of different colored balls in a specific order.[\Task]
[Instructions] To solve a logical puzzle involving colored balls:
1. Identify and list all given constraints and rules.
2. Start by placing balls that have the most constraints.
3. Use a process of elimination for balls with fewer constraints.
4. Check each step against the puzzle's rules to ensure compliance.
5. Iterate through different arrangements, using logical deductions until the correct order is achieved.[\Instructions]

[Task] How do I sort my file system?[\Task][\INST]
[Instructions] 

In [78]:
result = llm(prompt, stop=[stop_words, "\Instructions]", "[Output]", "[\Previous thought]", "\n\n"])

1. Open your computer's file explorer or finder and navigate to the folder containing files you want to sort.
2. Select the files or folders you wish to sort, either individually or by holding down the "Ctrl" key while selecting multiple items.
3. Click on the "Sort" button, usually represented by an arrow or a set of arrows pointing upwards (e.g., in Windows Explorer: View -> Sort A-Z; in macOS Finder: File -> New Folder -> Sort By). This will sort the selected items according to your chosen criteria.
4. Choose the sorting order from the options available, such as alphabetical, by date modified, by size, etc. You may also be able to customize additional settings or filters for more advanced sorting.
5. Once you are satisfied with the sorted folder, save any changes and close the file explorer or finder.


llama_print_timings:        load time =    7855.03 ms
llama_print_timings:      sample time =     110.93 ms /   196 runs   (    0.57 ms per token,  1766.93 tokens per second)
llama_print_timings: prompt eval time =    7854.37 ms /   248 tokens (   31.67 ms per token,    31.57 tokens per second)
llama_print_timings:        eval time =   16274.97 ms /   195 runs   (   83.46 ms per token,    11.98 tokens per second)
llama_print_timings:       total time =   26628.57 ms


In [79]:
print(result)

1. Open your computer's file explorer or finder and navigate to the folder containing files you want to sort.
2. Select the files or folders you wish to sort, either individually or by holding down the "Ctrl" key while selecting multiple items.
3. Click on the "Sort" button, usually represented by an arrow or a set of arrows pointing upwards (e.g., in Windows Explorer: View -> Sort A-Z; in macOS Finder: File -> New Folder -> Sort By). This will sort the selected items according to your chosen criteria.
4. Choose the sorting order from the options available, such as alphabetical, by date modified, by size, etc. You may also be able to customize additional settings or filters for more advanced sorting.
5. Once you are satisfied with the sorted folder, save any changes and close the file explorer or finder.


In [80]:
prompt, eval_stop = eval_prompt(task, result, best_answer)
print(prompt)

[INST] <<SYS>>You are a cognitive evaluator, select which option better answers the question or task.
<<\SYS>>
Examples:
[Task]: Outline the steps to implement a successful urban gardening project.[\Task]
[Options]
Option A
1. Select an area with good sunlight.
2. Prepare the soil with fertilizers.
3. Plant seeds or seedlings of various vegetables and herbs.
4. Set up a watering schedule.
5. Regularly check for pests and diseases.
Option B
1. Identify community needs and preferences for the garden.
2. Secure a location with proper sunlight and water access.
3. Collaborate with local gardening experts for plant selection and garden design.
4. Organize volunteer days for planting and maintenance.
5. Establish a system for sharing the harvest among contributors.[\Options]
[Answer] Option B[\Answer]

[Task]:How do I sort my file system?[\Task]
[Options]
Option A
1. Open your computer's file explorer or finder and navigate to the folder containing files you want to sort.
2. Select the files

In [81]:
eval_result = llm(prompt, stop=[eval_stop, "[\Answer]", "\n", "\n\n"])

Llama.generate: prefix-match hit


 A


llama_print_timings:        load time =    7855.03 ms
llama_print_timings:      sample time =       1.00 ms /     2 runs   (    0.50 ms per token,  1994.02 tokens per second)
llama_print_timings: prompt eval time =    5422.58 ms /   442 tokens (   12.27 ms per token,    81.51 tokens per second)
llama_print_timings:        eval time =     103.04 ms /     1 runs   (  103.04 ms per token,     9.70 tokens per second)
llama_print_timings:       total time =    5563.44 ms


In [82]:
# Update best answer
if "A" in eval_result:
    best_answer = result

## Orchestrate n results

In [99]:
### Loop function ###
def n_search(task, n=3, debug=False):
    best_answer = "\n"
    for n in range(n):
        # Build prompt and call model
        prompt, stop_words = create_agent_prompt(task=task, best_answer=best_answer)
        response = llm(prompt, stop=[stop_words, "Thought", "[Output]","[\Task]", "[\Previous thought]","\INST", "[\Best answer]", "\n\n"])
        # Build evaluation prompt and call model
        eval_prompt, eval_stop = create_eval_prompt(task=task, response=response, best_answer=best_answer)
        eval_response = llm(eval_prompt, stop=[eval_stop, "[\Answer]", "\n", "\n\n"])
        # Keep best result with fuzzy logic
        if debug: print(f"\n\nChallenge:\n{response}\nBest Answer so far:\n{best_answer}\nEvaluation:{eval_response}")
        # Update best answer
        if "A" in eval_response:
            # The new answer is now our best answer
            best_answer = response
    return best_answer
    

In [105]:
task = "How do I build a python script?"
best_answer = n_search(task)

1. Define your purpose and desired output by writing a clear problem statement.
2. Create a new file in your preferred text editor or IDE (Integrated Development Environment) and save it as a .py file.
3. Use indentation to create blocks of code, which are then interpreted by Python.
4. Write and execute individual functions using the def keyword.
5. Import necessary modules and libraries using the import statement.
6. Test your script by running it in the terminal or an interactive environment.
7. Refactor and debug your code as needed.
8. Save your changes and run the script again to ensure everything works as expected.
9. Document your script with comments and docstrings for better readability and understanding.


llama_print_timings:        load time =    3952.71 ms
llama_print_timings:      sample time =      87.70 ms /   155 runs   (    0.57 ms per token,  1767.49 tokens per second)
llama_print_timings: prompt eval time =    3951.73 ms /   248 tokens (   15.93 ms per token,    62.76 tokens per second)
llama_print_timings:        eval time =   13895.63 ms /   154 runs   (   90.23 ms per token,    11.08 tokens per second)
llama_print_timings:       total time =   20379.44 ms
Llama.generate: prefix-match hit


 A


llama_print_timings:        load time =    3952.71 ms
llama_print_timings:      sample time =       0.93 ms /     2 runs   (    0.46 ms per token,  2152.85 tokens per second)
llama_print_timings: prompt eval time =    3966.28 ms /   405 tokens (    9.79 ms per token,   102.11 tokens per second)
llama_print_timings:        eval time =     100.02 ms /     1 runs   (  100.02 ms per token,    10.00 tokens per second)
llama_print_timings:       total time =    4108.81 ms
Llama.generate: prefix-match hit


1. Define your purpose and desired output by writing a clear problem statement.
2. Create a new file in your preferred text editor or IDE (Integrated Development Environment) and save it as a .py file.
3. Use indentation to create blocks of code, which are then interpreted by Python.
4. Write and execute individual functions using the def keyword.
5. Import necessary modules and libraries using the import statement.
6. Test your script by running it in the terminal or an interactive environment.
7. Refactor and debug your code as needed.
8. Save your changes and run the script again to ensure everything works as expected.
9. Document your script with comments and docstrings for better readability and understanding.


llama_print_timings:        load time =    3952.71 ms
llama_print_timings:      sample time =      80.87 ms /   155 runs   (    0.52 ms per token,  1916.68 tokens per second)
llama_print_timings: prompt eval time =    4022.09 ms /   393 tokens (   10.23 ms per token,    97.71 tokens per second)
llama_print_timings:        eval time =   16340.13 ms /   154 runs   (  106.10 ms per token,     9.42 tokens per second)
llama_print_timings:       total time =   25941.71 ms
Llama.generate: prefix-match hit


 A


llama_print_timings:        load time =    3952.71 ms
llama_print_timings:      sample time =       0.73 ms /     2 runs   (    0.36 ms per token,  2751.03 tokens per second)
llama_print_timings: prompt eval time =    6302.07 ms /   558 tokens (   11.29 ms per token,    88.54 tokens per second)
llama_print_timings:        eval time =     118.67 ms /     1 runs   (  118.67 ms per token,     8.43 tokens per second)
llama_print_timings:       total time =    6500.50 ms
Llama.generate: prefix-match hit


1. Define your purpose and desired output by writing a clear problem statement.2. Create a new file in your preferred text editor or IDE (Integrated Development Environment) and save it as a .py file.3. Use indentation to create blocks of code, which are then interpreted by Python.4. Write and execute individual functions using the def keyword.5. Import necessary modules and libraries using the import statement.6. Test your script by running it in the terminal or an interactive environment.7. Refactor and debug your code as needed.8. Save your changes and run the script again to ensure everything works as expected.9. Document your script with comments and docstrings for better readability and understanding.


llama_print_timings:        load time =    3952.71 ms
llama_print_timings:      sample time =      84.77 ms /   151 runs   (    0.56 ms per token,  1781.27 tokens per second)
llama_print_timings: prompt eval time =    4159.46 ms /   393 tokens (   10.58 ms per token,    94.48 tokens per second)
llama_print_timings:        eval time =   16523.24 ms /   150 runs   (  110.15 ms per token,     9.08 tokens per second)
llama_print_timings:       total time =   27481.59 ms
Llama.generate: prefix-match hit


 A


llama_print_timings:        load time =    3952.71 ms
llama_print_timings:      sample time =       1.48 ms /     2 runs   (    0.74 ms per token,  1351.35 tokens per second)
llama_print_timings: prompt eval time =    6672.53 ms /   550 tokens (   12.13 ms per token,    82.43 tokens per second)
llama_print_timings:        eval time =     137.67 ms /     1 runs   (  137.67 ms per token,     7.26 tokens per second)
llama_print_timings:       total time =    6974.43 ms


In [106]:
print(best_answer)

1. Define your purpose and desired output by writing a clear problem statement.2. Create a new file in your preferred text editor or IDE (Integrated Development Environment) and save it as a .py file.3. Use indentation to create blocks of code, which are then interpreted by Python.4. Write and execute individual functions using the def keyword.5. Import necessary modules and libraries using the import statement.6. Test your script by running it in the terminal or an interactive environment.7. Refactor and debug your code as needed.8. Save your changes and run the script again to ensure everything works as expected.9. Document your script with comments and docstrings for better readability and understanding.


In [11]:
### NEXT STEP, TRANSFORM CHECKLISTS INTO A ACTION TREE

In [74]:
temp = """You are an autonomous agent which wants to grow it's capabilities by writing code in python.
You care curious about the world and act like a humble and knowlegable human being.
You want to build tools that could be useful for humans.

List of available functions:


List today's top 3 functions to build:
1. function that takes a list of numbers and returns the sum of all positive numbers in it
2. function that takes a list of lists and returns a flattened list of all elements from the inner lists
3. function that takes a string and returns the length of the string without counting any spaces or punctuation marks

Choose one from the list:
1. function that takes a list of numbers and returns the sum of all positive numbers in it

Describe the problem in detail and keep in mind edge cases:
The problem is to find the sum of all positive numbers in a given list of numbers. This means that you should not count any negative or zero numbers, nor any other elements that are not numbers (e.g., strings, booleans, etc.). You should also consider that some lists might be empty, which would mean that there are no positive numbers to sum up. In this case, the function should return 0.
Absolutely sure Test cases:
- Input: [1, 2, 3] -> Output: 6 -> Explanation: The list contains three positive numbers, so the sum is 1 + 2 + 3 = 6
- Input: [] -> Output: 0 -> Explanation: The list is empty, so there are no numbers to sum up, so the answer is 0
- Input: [42, -5, 7] -> Output: 34 -> Explanation: The list contains two positive numbers, so the sum is 42 + 7 = 34
- Input: [1.5, -2, 3] -> Output: 0 -> Explanation: The list contains one positive number and two negative numbers, so the sum is 0 (since it's not zero, the function should handle this case)

Code:
"""
temp = """You are an autonomous agent which wants to grow it's capabilities by writing code in python.
You care curious about the world and act like a humble and knowlegable human being.
You want to build tools that could be useful for humans.

Choose one from the list:
1. function that takes a list of numbers and returns the sum of all positive numbers in it

Describe the problem in detail and keep in mind edge cases:
The problem is to find the sum of all positive numbers in a given list of numbers. This means that you should not count any negative or zero numbers, nor any other elements that are not numbers (e.g., strings, booleans, etc.). You should also consider that some lists might be empty, which would mean that there are no positive numbers to sum up. In this case, the function should return 0.

Code:
def"""

tests = llm(temp, stop=["\n\n\n"])

Llama.generate: prefix-match hit


 sum_positive(numbers):
  # Initialize a variable to store the sum of positive numbers
  sum = 0
  # Loop through each element in the list
  for number in numbers:
    # Check if the element is a number using the isinstance() function
    if isinstance(number, (int, float)):
      # If it is a number, add it to the sum
      sum += number
  # Return the final sum or 0 if the list is empty
  return sum


llama_print_timings:        load time =    7476.00 ms
llama_print_timings:      sample time =      80.45 ms /   111 runs   (    0.72 ms per token,  1379.65 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    9584.77 ms /   111 runs   (   86.35 ms per token,    11.58 tokens per second)
llama_print_timings:       total time =   10675.05 ms


In [72]:
tests = "def" + tests

In [73]:
print(tests)

def- Input: sum_of_positive(numbers):
  # Initialize a variable to store the sum of positive numbers
  sum = 0
  # Loop through each element in the list
  for num in numbers:
    # Check if the element is a number and positive
    if isinstance(num, (int, float)):
      # If yes, add it to the sum
      sum += num
  # Return the final sum or 0 if the list is empty
  return sum if sum > 0 else 0

Test the function with some examples:
sum_of_positive([1, 2, 3]) -> 6
sum_of_positive([1, -2, 3]) -> 4
sum_of_positive([1, 2, 0]) -> 3


## Convert test descriptions into code

In [57]:
import re

def convert_to_unit_tests(test_string):
    test_cases = test_string.strip().split('\n')
    test_methods = []

    for i, test in enumerate(test_cases):
        input_data = re.search(r'Input: (\[.*?\])', test).group(1)
        output_data = re.search(r'Output: (\d+)', test).group(1)
        explanation = re.search(r'Explanation: (.*)', test).group(1).strip()

        test_method = f"""
def test_case_{i+1}(self):
    \"\"\"{explanation}\"\"\"
    self.assertEqual(sum_positive_numbers({input_data}), {output_data})
"""

        test_methods.append(test_method)

    return '\n'.join(test_methods)

test_string = """- Input: [1, 2, 3] -> Output: 6 -> Explanation: The list contains three positive numbers, so the sum is 1 + 2 + 3 = 6
- Input: [] -> Output: 0 -> Explanation: The list is empty, so there are no numbers to sum up, so the answer is 0
- Input: [42, -5, 7] -> Output: 34 -> Explanation: The list contains two positive numbers, so the sum is 42 + 7 = 34
- Input: [1.5, -2, 3] -> Output: 0 -> Explanation: The list contains one positive number and two negative numbers, so the sum is 0 (since it's not zero, the function should handle this case)"""

unit_tests = convert_to_unit_tests(test_string)
print(unit_tests)



def test_case_1(self):
    """The list contains three positive numbers, so the sum is 1 + 2 + 3 = 6"""
    self.assertEqual(sum_positive_numbers([1, 2, 3]), 6)


def test_case_2(self):
    """The list is empty, so there are no numbers to sum up, so the answer is 0"""
    self.assertEqual(sum_positive_numbers([]), 0)


def test_case_3(self):
    """The list contains two positive numbers, so the sum is 42 + 7 = 34"""
    self.assertEqual(sum_positive_numbers([42, -5, 7]), 34)


def test_case_4(self):
    """The list contains one positive number and two negative numbers, so the sum is 0 (since it's not zero, the function should handle this case)"""
    self.assertEqual(sum_positive_numbers([1.5, -2, 3]), 0)

