In [10]:
from llama_cpp import Llama, LlamaTokenizer
from transformers import AutoTokenizer

model = "../model/mistral-7b-instruct-v0.1.Q6_K.gguf"  # instruction model`

In [2]:
llm = Llama(
    model_path=model, 
    n_ctx=8192, 
    n_batch=512, 
    n_threads=7, 
    n_gpu_layers=2, 
    verbose=True, 
    seed=42
)
system = """
Follow the instructions below to complete the task.
"""

user = f"Create a python script to scan a directory and return the files of the directory that match a particular case "
f"insensitive string 'Mistral'. Add docstring as well."


llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from ../model/mistral-7b-instruct-v0.1.Q6_K.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 lla

"insensitive string 'Mistral'. Add docstring as well."

In [4]:
llm.tokenize((system+user).encode("utf-8", errors="ignore"))

[1,
 28705,
 13,
 28093,
 272,
 11382,
 3624,
 298,
 4160,
 272,
 3638,
 28723,
 13,
 3998,
 264,
 21966,
 6767,
 298,
 12794,
 264,
 9118,
 304,
 604,
 272,
 5373,
 302,
 272,
 9118,
 369,
 2918,
 264,
 2830,
 1222,
 28705]

In [5]:
LlamaTokenizer(llm).encode(system + user)

[1,
 28705,
 13,
 28093,
 272,
 11382,
 3624,
 298,
 4160,
 272,
 3638,
 28723,
 13,
 3998,
 264,
 21966,
 6767,
 298,
 12794,
 264,
 9118,
 304,
 604,
 272,
 5373,
 302,
 272,
 9118,
 369,
 2918,
 264,
 2830,
 1222,
 28705]

In [6]:
messages = [
    {"role": "user", "content": "What is your favourite condiment?"},
    {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
    {"role": "user", "content": "Do you have mayonnaise recipes?"}
]

In [12]:
llm.create_chat_completion(

[{'role': 'user', 'content': 'What is your favourite condiment?'},
 {'role': 'assistant',
  'content': "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
 {'role': 'user', 'content': 'Do you have mayonnaise recipes?'}]

In [13]:
llm.create_chat_completion(messages, max_tokens=4096)

Llama.generate: prefix-match hit

llama_print_timings:        load time =   12824.35 ms
llama_print_timings:      sample time =      17.72 ms /   210 runs   (    0.08 ms per token, 11853.02 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =   17551.17 ms /   210 runs   (   83.58 ms per token,    11.97 tokens per second)
llama_print_timings:       total time =   17906.92 ms /   211 tokens


{'id': 'chatcmpl-33d50ad9-6a6b-4c22-9e8f-9555e61da741',
 'object': 'chat.completion',
 'created': 1708546358,
 'model': '../model/mistral-7b-instruct-v0.1.Q6_K.gguf',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': "Of course! Here's a simple recipe for homemade mayonnaise that you can try:\n\nIngredients:\n\n* 2 large egg yolks\n* 1 tablespoon Dijon mustard\n* 1 tablespoon white wine vinegar\n* 1 tablespoon honey\n* 1 cup vegetable oil\n* Salt and pepper to taste\n\nInstructions:\n\n1. In a medium-sized bowl, whisk together the egg yolks, Dijon mustard, white wine vinegar, and honey until well combined.\n2. Slowly drizzle in the vegetable oil while whisking constantly until the mixture thickens and emulsifies.\n3. Season with salt and pepper to taste.\n4. Serve immediately or transfer to an airtight container and refrigerate until ready to use.\n\nThat's it! Enjoy your homemade mayonnaise on your favorite sandwich or as a dip for your veggies."},
   'finis

In [7]:
a = llm.create_completion(f"<s>[INST] {system} [/INST]</s>{user}", echo=True, stream=False, max_tokens=4096)['choices'][0]['text'] #.replace(message, '')


llama_print_timings:        load time =   12824.35 ms
llama_print_timings:      sample time =      14.32 ms /   144 runs   (    0.10 ms per token, 10055.87 tokens per second)
llama_print_timings: prompt eval time =   12823.71 ms /    43 tokens (  298.23 ms per token,     3.35 tokens per second)
llama_print_timings:        eval time =   12117.31 ms /   143 runs   (   84.74 ms per token,    11.80 tokens per second)
llama_print_timings:       total time =   25163.57 ms /   186 tokens


In [8]:
print(a)

<s>[INST] 
Follow the instructions below to complete the task.
 [/INST]</s>Create a python script to scan a directory and return the files of the directory that match a particular case 
insensitive pattern.

```python
import os
def find_files(directory, pattern):
    matching_files = []
    for file in os.listdir(directory):
        if file.lower().endswith(pattern.lower()):
            matching_files.append(file)
    return matching_files
```
Usage:
```python
files = find_files('directory', 'extension')
print(files)
```
Note: This will only work for directories with a single level directory structure. If the directory contains nested directories, you will need to modify the code to recursively search for files.


In [9]:
b = llm.create_completion(system+user, echo=True, stream=False, max_tokens=4096)['choices'][0]['text'] #.replace(message, '')
print(b)

Llama.generate: prefix-match hit

llama_print_timings:        load time =   12824.35 ms
llama_print_timings:      sample time =      58.06 ms /   574 runs   (    0.10 ms per token,  9886.15 tokens per second)
llama_print_timings: prompt eval time =    2665.45 ms /    33 tokens (   80.77 ms per token,    12.38 tokens per second)
llama_print_timings:        eval time =   41401.50 ms /   573 runs   (   72.25 ms per token,    13.84 tokens per second)
llama_print_timings:       total time =   45065.32 ms /   606 tokens



Follow the instructions below to complete the task.
Create a python script to scan a directory and return the files of the directory that match a particular case  sensitive pattern and have not been modified in the last 7 days.
1. First, we will import the os module which provides access to the operating system directory structure and the time module which provides time manipulation functions. We also import the shutil module for moving files from one place to another.
```python
import os
import time
import shutil
```
2. Next we define the function `scan_directory` which takes two arguments- `directory_path` and `file_pattern`. `directory_path` is the path of the directory whose files we want to scan and `file_pattern` is the pattern we want to search for in the file names. 
```python
def scan_directory(directory_path, file_pattern):
```
3. Inside the function `scan_directory`, we use a `for` loop to iterate over all the files in the specified directory using `os.listdir()` method. We

In [46]:
message = f"<s>[INST] {system} [/INST]</s>{user}"
output = llm.create_completion(message, echo=True, stream=False, max_tokens=4096)
output_cleaned = output['choices'][0]['text'].replace(message, '')


llama_print_timings:        load time =    9920.47 ms
llama_print_timings:      sample time =      23.09 ms /   224 runs   (    0.10 ms per token,  9699.07 tokens per second)
llama_print_timings: prompt eval time =    3743.07 ms /    40 tokens (   93.58 ms per token,    10.69 tokens per second)
llama_print_timings:        eval time =   15830.62 ms /   223 runs   (   70.99 ms per token,    14.09 tokens per second)
llama_print_timings:       total time =   19946.21 ms /   263 tokens


In [40]:
llm.model

4442618000

In [43]:
llm._model

<llama_cpp._internals._LlamaModel at 0x282cef350>

In [39]:
llm._model.tokenize(system+user, True, False)

<llama_cpp._internals._LlamaModel at 0x282cef350>

In [47]:
print(output['usage'])
print(output_cleaned)

{'prompt_tokens': 40, 'completion_tokens': 223, 'total_tokens': 263}


1. Import os module for operating system functionality.
2. Define the function scan_directory with two parameters: directory_path (string) and pattern (string).
3. Use the os.listdir() method to get a list of all files in the directory.
4. Use the filter() method to filter out the files that don't match the pattern.
5. Use the map() method to convert the file names into absolute paths using the join() method of the os module.
6. Return the list of absolute paths of the matching files.

Example:

```python
import os

def scan_directory(directory_path, pattern):
    matching_files = []
    for file in os.listdir(directory_path):
        if file.lower() == pattern.lower():
            matching_files.append(os.path.join(directory_path, file))
    return matching_files

print(scan_directory('path/to/directory', 'example'))
```


In [11]:
output.keys()

dict_keys(['id', 'object', 'created', 'model', 'choices', 'usage'])

In [12]:
output['usage']

{'prompt_tokens': 60, 'completion_tokens': 4096, 'total_tokens': 4156}

In [56]:
import os
from typing import List, Optional

In [57]:
def find_files(directory: str, file_search_term: Optional[str] = None) -> List[str]:
    """
    Scans a given directory and returns a list of files that match a given search term.
    :param directory: str - The path of the directory to be scanned
    :param file_search_term: Optional[str] - The search term to be matched in file names. If None, all files will be returned
    :return: List[str] - A list of file names matching the search term or all files in the directory if file_search_term is None
    """
    file_list = []
    if file_search_term is None:
        for file in os.listdir(directory):
            file_list.append(file)
    else:
        for root, dirs, files in os.walk(directory):
            for file in files:
                if file_search_term in file:
                    file_list.append(os.path.join(root, file))
    return file_list

In [59]:
find_files("model/", "Mistral")

[]

In [65]:
import os
import shutil
import re

def find_files_with_string(directory, pattern):
    """
    This function takes in two arguments, a directory and a pattern, and returns a list of all
    the files in that directory which contain the given pattern in their filename or content.
    """
    file_list = []
    for root, dirs, files in os.walk(directory):
        for filename in files:
            filepath = os.path.join(root, filename)
            if re.search(pattern, filename, re.IGNORECASE):
                file_list.append(filepath)
    return file_list

In [74]:
find_files_with_string("model/", "Mistral")

['model/mistral-7b-instruct-v0.1.Q6_K.gguf']