# Try_2 LLAMA_3

## Imports

In [None]:
# pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai faiss-cpu 'huggingface_hub[cli,torch,tensorflow]' transformers vllm

In [1]:
import requests
from bs4 import BeautifulSoup
from transformers import pipeline, AutoModel
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from tqdm import tqdm  # For progress bar

In [3]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizerFast, PreTrainedTokenizerFast

tokenizer = PreTrainedTokenizerFast.from_pretrained("unsloth/Meta-Llama-3.1-8B-bnb-4bit", local_files_only=True)
model = AutoModelForCausalLM.from_pretrained("unsloth/Meta-Llama-3.1-8B-bnb-4bit")

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


In [None]:
def scrape_langchain_docs(url, start_page=1, end_page=10):
    """Scrapes content from LangChain documentation pages."""
    all_text = []
    for page_num in range(start_page, end_page + 1):
        page_url = f"{url}/p/{page_num}"  # Adjust URL format as needed
        response = requests.get(page_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract text content (modify selectors as needed)
        content_div = soup.find('div', class_='markdown')
        if content_div:
            text = content_div.get_text(separator="\n", strip=True)
            all_text.append(text)
    return all_text

In [None]:
def create_langchain_vectordb(docs, embeddings_model, db_path="langchain_docs.db"):
    """Creates a ChromaDB vector database from LangChain documentation."""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    texts = text_splitter.create_documents(docs)

    vectordb = Chroma.from_documents(
        texts,
        embedding=embeddings_model,
        persist_directory=db_path
    )
    vectordb.persist()
    return vectordb

In [None]:
# model_name = "decapoda-research/llama-7b-hf"  # Choose a suitable smaller model
model_name = model_id
generator = pipeline('text-generation', model=model_name)
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

def get_context_from_docs(query, vectordb):
    """Retrieves relevant context from LangChain documentation."""
    results = vectordb.similarity_search(query, k=3)
    return [doc.page_content for doc in results]

def answer_question(question, vectordb):
    """Answers user questions using LLM and RAG."""
    context = get_context_from_docs(question, vectordb)
    prompt = f"""
    Answer the following question based on the provided context:

    Context:
    {context}

    Question: {question}
    Answer:
    """
    response = generator(prompt, max_length=200)[0]['generated_text']
    return response

## Main

In [None]:
if __name__ == "__main__":
    langchain_docs_url = "https://python.langchain.com/docs/get_started/introduction"  # Correct URL
    all_docs = scrape_langchain_docs(langchain_docs_url, start_page=1, end_page=5) # Scrape first 5 pages

    # Create the vector database (this may take a while depending on the data)
    vectordb = create_langchain_vectordb(all_docs, embeddings_model)

    # Now you can answer questions:
    while True:
        user_question = input("Ask a question about LangChain: ")
        answer = answer_question(user_question, vectordb)
        print(answer)