### install

In [None]:
%pip install -qU langchain-ollama
%pip install -qU langchain-community beautifulsoup4
%pip install transformers


In [None]:
%pip install transformers


### docs


In [None]:

2024-12-15 21:12:44  [INFO] [LM STUDIO SERVER] Supported endpoints:
2024-12-15 21:12:44  [INFO] [LM STUDIO SERVER] ->	GET  http://localhost:1234/v1/models
2024-12-15 21:12:44  [INFO] [LM STUDIO SERVER] ->	POST http://localhost:1234/v1/chat/completions
2024-12-15 21:12:44  [INFO] [LM STUDIO SERVER] ->	POST http://localhost:1234/v1/completions
2024-12-15 21:12:44  [INFO] [LM STUDIO SERVER] ->	POST http://localhost:1234/v1/embeddings



### imports

In [5]:
from langchain.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
import requests
import re
from bs4 import BeautifulSoup
from typing import List
import tiktoken
from langchain_community.document_loaders import RecursiveUrlLoader

# URL do servidor LM Studio
lm_studio_url = "http://localhost:1234/v1"

### testing code

In [None]:
loader = RecursiveUrlLoader(
    "https://ollama.com/library/llama3.2/tags",
    # max_depth=2,
    # use_async=False,
    # extractor=None,
    # metadata_extractor=None,
    # exclude_dirs=(),
    # timeout=10,
    # check_response_status=True,
    # continue_on_failure=True,
    # prevent_outside=True,
    # base_url=None,
    # ...
)

docs = loader.load()
docs[0].metadata


In [None]:
def bs4_extractor(html):
    soup = BeautifulSoup(html, "lxml")
    return re.sub(r"\n\n+", "\n\n", soup.text).strip()

loader = RecursiveUrlLoader("https://bflixhd.lol/home/", extractor=bs4_extractor)
docs = loader.load()

In [None]:
docs[0].page_content


### get movies

In [22]:
# Função para carregar dados de uma URL
def load_data_from_url(url):
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, "html.parser")
    # Extrair texto visível da página
    return soup.get_text()

# Função para dividir os dados em chunks com base na contagem de tokens
def split_into_chunks(data, max_tokens, tokenizer):
    tokens = tokenizer.encode(data)
    chunks = []
    for i in range(0, len(tokens), max_tokens):
        chunk_tokens = tokens[i:i + max_tokens]
        chunk_text = tokenizer.decode(chunk_tokens)
        chunks.append(chunk_text)
    return chunks


In [None]:
# Scores

# Configurações
url = "https://bflixhd.lol/filters"  # Substitua pelo URL desejado
# question = "nome do filme, ano de lançamento, e Scores, votes ou numero de views e tambem a sinpse" 
question = '''
analise o no contexto dado.
quantos filmes e séries tem no contexto e quais sao eles?
Extraia os nomes de todos os filmes e series e seus scores.
me de tambem o total de filmes e series.  
'''

MAX_CHUNK_TOKENS = 4096  # Tamanho máximo permitido para o chunk

# Carregar e dividir os dados
try:
    raw_data = load_data_from_url(url)
    tokenizer = tiktoken.get_encoding("gpt2")
    chunks = split_into_chunks(raw_data, MAX_CHUNK_TOKENS, tokenizer)
except Exception as e:
    print(f"Erro ao carregar ou processar os dados: {e}")
    chunks = []
print(chunks)
print(len(chunks))


In [None]:

# URL para o endpoint de completions
completions_url = f"{lm_studio_url}/chat/completions"
    
# Prompt base para o modelo
prompt_template = "{data}\n\n{question}"

model = "mistral-7b-instruct-v0.3"

# Usar uma sessão do requests para melhorar a performance em múltiplas requisições
with requests.Session() as session:
    # Armazenar respostas e contagem de tokens
    responses = []
    token_counts = []
    
    print(f"Número de tokens por chunk: {len(chunks)}")
    
    for chunk in chunks:
        token_count = len(tokenizer.encode(chunk))
        print(token_count)
        token_counts.append(token_count)
        print(token_counts)
        prompt = prompt_template.format(data=chunk, question=question)

        # Preparar a requisição POST
        payload = {
            "model": model,
            "messages": [
                {"role": "assistant", "content": "Você é um assistente útil."},
                {"role": "user", "content": prompt}
            ]
        }

        try:
            # Requisição POST
            response = session.post(completions_url, json=payload)

            # Verificar se a resposta foi bem-sucedida
            response.raise_for_status()  # Levanta exceção para erros HTTP
            content_response = response.json().get('choices', [{}])[0].get('message', {}).get('content', '')
            
            if content_response:
                print("Resposta do modelo:", content_response)
                responses.append(content_response)
            else:
                print("Resposta do modelo vazia ou malformada.")

        except requests.exceptions.RequestException as e:
            print(f"Erro ao gerar resposta: {e}")

# Combinar as respostas
final_answer = " ".join(responses)

# Exibir a resposta final e contagem de tokens
print("LLM Final Answer:")
print(final_answer)

print("\nToken Counts per Chunk:")
print(token_counts)


#### managing tokens

In [None]:
import requests
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("gpt2")
max_tokens = 4096
model = "mistral-7b-instruct-v0.3"
completions_url = f"{lm_studio_url}/chat/completions"
prompt_template = "{data}\n\n{question}"
context_tokens = len(tokenizer.encode("Você é um assistente útil.")) + len(tokenizer.encode(question))
max_chunk_size = max_tokens - context_tokens - 512

def process_chunk(session, chunk):
    token_count = len(tokenizer.encode(chunk))
    if token_count > max_chunk_size:
        split_chunks = [
            tokenizer.decode(tokenizer.encode(chunk)[i:i + max_chunk_size])
            for i in range(0, token_count, max_chunk_size)
        ]
    else:
        split_chunks = [chunk]

    responses = []
    for sub_chunk in split_chunks:
        prompt = prompt_template.format(data=sub_chunk, question=question)
        payload = {
            "model": model,
            "messages": [
                {"role": "assistant", "content": "Você é um assistente útil."},
                {"role": "user", "content": prompt}
            ],
            "max_tokens": 512
        }
        try:
            response = session.post(completions_url, json=payload)
            response.raise_for_status()
            content = response.json().get('choices', [{}])[0].get('message', {}).get('content', '')
            if content:
                responses.append(content)
        except requests.exceptions.RequestException:
            pass
    return responses

responses = []
with requests.Session() as session:
    for chunk in chunks:
        responses.extend(process_chunk(session, chunk))

final_answer = " ".join(responses)
print(final_answer)

#### code async

In [None]:
import httpx
from transformers import AutoTokenizer
import asyncio

# Configurações
tokenizer = AutoTokenizer.from_pretrained("gpt2")
max_tokens = 4096
model = "mistral-7b-instruct-v0.3"
completions_url = f"{lm_studio_url}/chat/completions"
prompt_template = "{data}\n\n{question}"
context_tokens = len(tokenizer.encode("Você é um assistente útil.")) + len(tokenizer.encode(question))
max_chunk_size = max_tokens - context_tokens - 512

async def process_chunk(session, chunk):
    token_count = len(tokenizer.encode(chunk))
    if token_count > max_chunk_size:
        split_chunks = [
            tokenizer.decode(tokenizer.encode(chunk)[i:i + max_chunk_size])
            for i in range(0, token_count, max_chunk_size)
        ]
    else:
        split_chunks = [chunk]

    responses = []
    for sub_chunk in split_chunks:
        prompt = prompt_template.format(data=sub_chunk, question=question)
        payload = {
            "model": model,
            "messages": [
                {"role": "assistant", "content": "Você é um assistente útil."},
                {"role": "user", "content": prompt}
            ],
            "max_tokens": 512
        }
        try:
            response = await session.post(completions_url, json=payload)
            response.raise_for_status()
            content = response.json().get('choices', [{}])[0].get('message', {}).get('content', '')
            if content:
                responses.append(content)
        except httpx.RequestError:
            pass
    return responses

async def main():
    responses = []
    async with httpx.AsyncClient() as session:
        tasks = [process_chunk(session, chunk) for chunk in chunks]
        results = await asyncio.gather(*tasks)
        for result in results:
            responses.extend(result)

    final_answer = " ".join(responses)
    print(final_answer)

# Executar o código
asyncio.run(main())
