In [1]:
from src.indexing import IndexingManager
from src.chatter.conversation_manager import *

In [2]:

# Initialize IndexingManager
index_manager = IndexingManager("https://en.wikipedia.org/wiki/Brazil")
await index_manager()  # Build index

# Initialize ConversationManager
conv_manager = ConversationManager(
    url="https://en.wikipedia.org/wiki/Brazil",
    user="Ruan_f"
)
conv_manager.index_manager = index_manager  # Set reference
await conv_manager.initialize()

# Initialize Model
model_mgr = AsyncModelManager()
model = await model_mgr.load_model()

Loaded index with 622 vectors


llama_new_context_with_model: n_ctx_per_seq (2048) < n_ctx_train (32768) -- the full capacity of the model will not be utilized


In [3]:

# Example interaction
query = "What's the capital of Brazil?"
response = await generate_response(model, conv_manager, query)
await conv_manager.add_interaction(query, response)

print(f"Response: {response}")

Loading model from local directory or downloading it...
Generating embeddings...


Response:  The capital of Brazil is Brasília. It was inaugurated in 1960 and is located in the Federal District.


In [None]:
def build_starting_prompt(context_chunks, query):
    base_prompt = '''Using the information below, answer any question the user might have about this topic. If the answer cannot be found, write
    "I'm sorry, but i couldn't find the answer."
    '''
    for information_chunks in context_chunks:
        base_prompt += f"\nInformation: {information_chunks}"
    return base_prompt + f"\nUser question: {query}\nAnswer clearly and concisely."

In [None]:
def build_conversation_prompt(context_chunks, history, query):

SyntaxError: incomplete input (2340962318.py, line 1)

In [None]:
import os
import json
from datetime import datetime
from transformers import AutoModelForCausalLM, AutoTokenizer

def load_model(model_name):
    """Loads and prepares the model from local storage or downloads if missing"""
    model_path = f"data/models/{model_name}"
    
    if not os.path.exists(model_path):
        # Download and save model if not present locally
        model = AutoModelForCausalLM.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model.save_pretrained(model_path)
        tokenizer.save_pretrained(model_path)
    else:
        # Load from local storage
        model = AutoModelForCausalLM.from_pretrained(model_path)
        tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    # For CPU-only systems
    model = model.to('cpu')
    return model, tokenizer

def generate_response(model, tokenizer, query, conversation_history=None):
    """Generates a response to the query using the LLM"""
    # Prepare conversation history
    history = conversation_history or []
    prompt = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in history])
    prompt += f"\nUser: {query}\nAssistant:"
    
    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to('cpu')
    outputs = model.generate(
        inputs.input_ids,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    
    # Decode and clean response
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = answer.split("Assistant:")[-1].strip()
    return answer

def save_conversation(website_name, query, answer):
    """Saves conversation history in a structured format"""
    conv_dir = f"data/conversations/websites/{website_name}"
    os.makedirs(conv_dir, exist_ok=True)
    
    # Create filename with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{timestamp}_conversation.json"
    filepath = os.path.join(conv_dir, filename)
    
    # Save as JSON
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump({
            'timestamp': datetime.now().isoformat(),
            'query': query,
            'answer': answer
        }, f, ensure_ascii=False, indent=2)

In [None]:
# Initialize model
model, tokenizer = load_model("gpt2")

# Example conversation loop
website = "example-forum"
history = []

while True:
    query = input("User: ")
    if query.lower() == 'exit':
        break
    
    # Generate response
    response = generate_response(model, tokenizer, query, history)
    print(f"Assistant: {response}")
    
    # Save interaction
    save_conversation(website, query, response)
    
    # Update conversation history
    history.append((query, response))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Assistant: What kind of story?
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be named in a story.
User: I am the first person to be

KeyboardInterrupt: Interrupted by user

In [None]:
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Initialize components
class RAGSystem:
    def __init__(self):
        # Vector DB
        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
        self.index = faiss.IndexFlatL2(384)
        self.chunks = []
        
        # LLM
        self.llm = Llama(
            model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
            n_ctx=2048,
            n_threads=8,
            verbose=False
        )
    
    def add_to_index(self, text_chunks):
        embeddings = self.encoder.encode(text_chunks)
        self.index.add(np.array(embeddings))
        self.chunks.extend(text_chunks)
    
    def retrieve(self, query, k=3):
        query_embed = self.encoder.encode([query])
        _, indices = self.index.search(query_embed, k)
        return [self.chunks[i] for i in indices[0] if i < len(self.chunks)]
    
    def generate(self, query, history=None):
        # Retrieve context
        context_chunks = self.retrieve(query)
        context = "\n\n".join(context_chunks)
        
        # Format history
        history = history or []
        history_str = "\n".join(
            [f"User: {q}\nAssistant: {a}" for q, a in history[-3:]]
        )
        
        # Build prompt
        prompt = f"""<s>[INST] <<SYS>>
You're a helpful assistant. Answer using this context:
{context}
<</SYS>>

Chat History:
{history_str}

Question: {query} [/INST]"""
        
        # Generate response
        response = self.llm.create_chat_completion(
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2,
            max_tokens=512
        )
        
        return response['choices'][0]['message']['content']

In [None]:
async def first_ask_model(url, query, user):
    tree, folder_path = await build_index_from_url(url, 1)
    chunks = build_chunks_from_tree(tree)
    em = EmbeddingManager(index_dir = folder_path)
    index = em.generate_index_from_chunks(chunks, "website_embbed_index3") 
    related_chunks = em.search_index([query], 5)
    prompt = build_starting_prompt(related_chunks, query)