# Crafting promt

The purpose of this notebook is to make the LLM craft a prompt for a later task. 

In [27]:
# Importing the necessary libraries
import os
import requests
from bs4 import BeautifulSoup
import json
from IPython.display import Markdown, display, update_display
import openai

In [28]:
# CONSTANTS
OLLAMA_API = "http://127.0.0.1:11434//api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

In [29]:
# Crea una lista de mensajes utilizando el mismo formato que el API de OpenAI
messages = [{"role": "user", "content": "Hola, ¿cómo estás?"}]
payload = {
    "model": MODEL,
    "messages": messages,
    "stream": False
}

In [30]:
# Class for retrieve the Website content
class Website:
    """ Class to retrieve the content of a website """
    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.text
        self.soup = BeautifulSoup(self.body, "html.parser")     
        self.title = self.soup.title.string if self.soup.title else None   

        # Eliminating irrelevant tags
        if self.soup.body:
            for irrelevant in self.soup(["script", "style", "img", "input"]):            
                irrelevant.decompose()
            self.text = self.soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        
        # Extracting the links
        links = (link.get("href") for link in self.soup.find_all("a"))
        self.links = [link for link in links if link]

    # Method to get the content of the website
    def get_content(self):
        return f"Titulo de la Web: \n{self.title}\n\nTexto de la Web: \n{self.text}\n\nLinks de la Web: \n{self.links}"

Extract relevant links using JSON format

In [31]:
link_system_prompt = "You are provided a list of links from a website. \
You must decide which links are relevant for creating a brochure and which are not \
like links to About, FAQ, etc.\n"
link_system_prompt += "You must respond in a JSON format like this:.\n"                                        
link_system_prompt += """
{
    "links": [
        {"type": "About us page", "url": "https://url.completa/aqui/va/sobre/nosotros"},
        {"type": "Course pagge": "url": "https://otra.url.completa/courses"}
    ]
}
"""
print(link_system_prompt)

You are provided a list of links from a website. You must decide which links are relevant for creating a brochure and which are not like links to About, FAQ, etc.
You must respond in a JSON format like this:.

{
    "links": [
        {"type": "About us page", "url": "https://url.completa/aqui/va/sobre/nosotros"},
        {"type": "Course pagge": "url": "https://otra.url.completa/courses"}
    ]
}



In [32]:
def get_links_user_prompt(website):
    user_prompt = f"Aquí hay una lista de enlaces de la página web {website.url} - "
    user_prompt += "Por favor, decide cuáles de estos son enlaces web relevantes para un folleto sobre la empresa. Responde con la URL https completa en formato JSON. \
No incluyas Términos y Condiciones, Privacidad ni enlaces de correo electrónico.\n"
    user_prompt += "Links (puede que algunos sean links relativos):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [39]:
def get_links(url):
    
    website = Website(url)
    OLLAMA_API = "http://127.0.0.1:11434//api/chat"
    HEADERS = {"Content-Type": "application/json"}
    MODEL = "llama3.2"

    messages = [{"role": "user", "content": get_links_user_prompt(website)}]
    payload = {
        "model": MODEL,
        "messages": messages,
        "stream": False,
        "system": link_system_prompt,
        "format":"json"
    }
    
    response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
    if response.status_code == 200:
        response_data = response.json()
        if 'message' in response_data and 'content' in response_data['message']:
            print(response_data['message']['content'])
            return response_data
        else:
            print("Unexpected response structure:", response_data)
        
    else:
        print(f"Error: Request failed with status code {response.status_code}")            


response_1 = get_links("https://anthropic.com")
response_1

{

 "enlaces": [
 "https://claude.ai/",
 "https://www.anthropic.com/research#entry:8@1:url",
 "https://www.anthropic.com/claude",
 "https://claude.ai/",
 "https://www.anthropic.com/news/claude-3-7-sonnet",
 "https://www.anthropic.com/claude/sonnet",
 "https://www.anthropic.com/news/visible-extended-thinking",
 "https://www.anthropic.com/news/claude-for-enterprise",
 "https://www.anthropic.com/research/constitutional-ai-harmlessness-from-ai-feedback",
 "https://www.anthropic.com/news/core-views-on-ai-safety",
 "https://www.anthropic.com/jobs",
 "https://www.anthropic.com/team",
 "https://www.anthropic.com/enterprise",
 "https://docs.anthropic.com/pricing#anthropic-api",
 "https://console.anthropic.com/",
 "https://www.anthropic.com/news",
 "https://www.anthropic.com/customers",
 "https://www.anthropic.com/engineering",
 "https://www.anthropic.com/company",
 "https://www.anthropic.com/careers",
 "https://status.anthropic.com/",
 "https://supported-countries.anthropic.com",
 "https://docs

{'model': 'llama3.2',
 'created_at': '2025-03-08T20:12:11.1143028Z',
 'message': {'role': 'assistant',
  'content': '{\n\n "enlaces": [\n "https://claude.ai/",\n "https://www.anthropic.com/research#entry:8@1:url",\n "https://www.anthropic.com/claude",\n "https://claude.ai/",\n "https://www.anthropic.com/news/claude-3-7-sonnet",\n "https://www.anthropic.com/claude/sonnet",\n "https://www.anthropic.com/news/visible-extended-thinking",\n "https://www.anthropic.com/news/claude-for-enterprise",\n "https://www.anthropic.com/research/constitutional-ai-harmlessness-from-ai-feedback",\n "https://www.anthropic.com/news/core-views-on-ai-safety",\n "https://www.anthropic.com/jobs",\n "https://www.anthropic.com/team",\n "https://www.anthropic.com/enterprise",\n "https://docs.anthropic.com/pricing#anthropic-api",\n "https://console.anthropic.com/",\n "https://www.anthropic.com/news",\n "https://www.anthropic.com/customers",\n "https://www.anthropic.com/engineering",\n "https://www.anthropic.com/comp