In [None]:
# loader.py
import json
from langchain.docstore.document import Document

def load_service_docs(path="services.json"):
    with open(path, "r") as f:
        services = json.load(f)

    docs = []
    for s in services:
        content = f"{s['title']}: {s['description']}"
        docs.append(Document(page_content=content, metadata={"title": s["title"]}))
    return docs

In [2]:
# vector_store.py
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

def create_vectorstore():
    docs = load_service_docs()
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(docs, embeddings)
    vectorstore.save_local("faiss_index")


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
## rag_chain.py
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM 
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware

def load_rag_chain():
    # Load LLaMA 2 chat model
    model_id = "meta-llama/Llama-2-7b-chat-hf"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
    
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=300, do_sample=True)
    llm = HuggingFacePipeline(pipeline=pipe)

    # Load vector store
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.load_local("faiss_index", embeddings)

    # Create RetrievalQA chain
    chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever(), chain_type="stuff")
    return chain
app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])

rag_chain = load_rag_chain()

@app.post("/chat")
async def chat(request: Request):
    data = await request.json()
    question = data.get("message", "")
    response = rag_chain.run(question)
    return {"reply": response}

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-chat-hf.
401 Client Error. (Request ID: Root=1-68259010-5a717d97178bd1453bde0b30;9bcd8a6a-1455-417d-b3e6-7829542e6274)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-7b-chat-hf is restricted. You must have access to it and be authenticated to access it. Please log in.

In [9]:
huggingface-cli login


SyntaxError: invalid syntax (3291835336.py, line 1)