In [2]:
from huggingface_hub import interpreter_login

interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|



In [16]:
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer

from pprint import pprint
from dotenv import load_dotenv
from IPython.display import display_markdown

from groq import Groq



This pattern allows the llm to critique and reflect its output, following the next steps:
1. The LLM **generates** a candidate output.
2. The **reflects** on the previous output, suggesting modifications, deletions, improvements to the writing style, etc.
3. The LLM modifies the original output based on the reflections and other iteration begins..

## Generation Step

We are going to ask our LLM to code a famous sorting algorithm: Merge sort

In [13]:
model_name = "HuggingFaceTB/SmolLM2-135M-Instruct"
#model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [14]:
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path = model_name,
    torch_dtype = torch.float32,
).to(device)

model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

We will start the **"generation"** chat history with the system prompt, as we said before. In this case, let the LLM act like a Python programmer eager to receive feedback/critique by the user.

In [18]:
generation_chat_history = [
    {
        "role": "system",
        "content": "Your are a Python programmer tasked with generating high quality python code."
        "your task is to generate the best content possible for the user's request. if the user provides critique,"
        "respond with a revised version of your previous attempt"
    },
    {
        "role": "user",
        "content": "Generate a Python implementation  of the Merge Sort algorithm"
    }
]

In [20]:
formatted_prompt = tokenizer.apply_chat_template(generation_chat_history, tokenize= False)

In [23]:
# Generate response
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=1000)

In [24]:
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

system
Your are a Python programmer tasked with generating high quality python code.your task is to generate the best content possible for the user's request. if the user provides critique,respond with a revised version of your previous attempt
user
Generate a Python implementation  of the Merge Sort algorithm
assistant
Here's a Python implementation of the Merge Sort algorithm:

```python
def merge_sort(arr):
    if len(arr) <= 1:
        return arr

    mid = len(arr) // 2
    left_half = arr[:mid]
    right_half = arr[mid:]

    left_half = merge_sort(left_half)
    right_half = merge_sort(right_half)

    return merge(left_half, right_half)


def merge(left, right):
    merged = []
    left_index = 0
    right_index = 0

    while left_index < len(left) and right_index < len(right):
        if left[left_index] <= right[right_index]:
            merged.append(left[left_index])
            left_index += 1
        else:
            merged.append(right[right_index])
            right_i