In [None]:
!pip -q install streamlit langchain langchain-community langchain-google-genai \
               InstructorEmbedding sentence-transformers google-generativeai pyngrok

In [None]:
%%writefile app.py
import os
import streamlit as st
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

google_api = "Your_API_Key"   # your API key

# --- LLM (make sure GOOGLE_API_KEY is set in env or pass google_api_key here) ---
google_llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.0,
    google_api_key=google_api
    # google_api_key=os.environ.get("GOOGLE_API_KEY"),  # or pass explicitly
)

# --- Embeddings (requires: pip install InstructorEmbedding sentence-transformers) ---
instructor_embedding = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base")

INDEX_DIR = "faiss_index"  # folder for FAISS index (index.faiss + index.pkl)
CSV_PATH = "codebasics_faqs_utf8.csv"  # make sure you've re-saved as UTF-8

def create_vectordb():
    loader = CSVLoader(file_path=CSV_PATH, source_column="prompt")
    docs = loader.load()
    vectordb = FAISS.from_documents(documents=docs, embedding=instructor_embedding)
    vectordb.save_local(INDEX_DIR)

def ensure_index():
    if not os.path.exists(os.path.join(INDEX_DIR, "index.faiss")):
        create_vectordb()  # builds and saves ./faiss_index

def get_qa_chain():

    vectordb = FAISS.load_local(
    INDEX_DIR,                 # or vector_db_path
    instructor_embedding,
    allow_dangerous_deserialization=True   # <-- required
)

    retriever = vectordb.as_retriever(
        search_type="similarity",           # start simple
        search_kwargs={"k": 4}              # tune later; threshold can hide all docs
    )

    prompt_template = """Given the following context and a question, generate an answer based on this context only.
In the answer, reuse as much text as possible from the "response" section of the source context.
If the answer is not found in the context, say "I don't know."

CONTEXT:
{context}

QUESTION:
{question}
"""

    PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

    chain = RetrievalQA.from_chain_type(
        llm=google_llm,
        chain_type="stuff",
        retriever=retriever,
        input_key="query",
        return_source_documents=True,
        chain_type_kwargs={"prompt": PROMPT},
    )
    return chain

if __name__ == "__main__":
    ensure_index()  # <-- builds index once if missing
    qa = get_qa_chain()
    result = qa({"query": "Should I learn PowerBI or Tableau?"})
    print(result["result"])
# ---------------- Streamlit UI ----------------
st.title("Codebasics Q&A 🌱")
if st.button("Create Knowledgebase"):
    try:
        create_vectordb()
        st.success(f"Vector index saved to ./{INDEX_DIR}")
    except Exception as e:
        st.error(f"Failed to create index: {e}")

question = st.text_input("Question:")
if question:
    try:
        chain = get_qa_chain()
        # IMPORTANT: pass dict because input_key="query"
        response = chain({"query": question})
        st.header("Answer")
        st.write(response.get("result", ""))

        # Optional: show sources
        src_docs = response.get("source_documents") or []
        if src_docs:
            st.subheader("Sources")
            for i, d in enumerate(src_docs, 1):
                st.write(f"{i}. {d.metadata.get('source', 'N/A')}")
    except Exception as e:
        st.error(f"Q&A failed: {e}")


Overwriting app.py


In [None]:
# kill any previous server on 8501, then start Streamlit
!fuser -k 8501/tcp >/dev/null 2>&1 || true
import subprocess, time
p = subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"])
time.sleep(3)  # give it a moment to boot



In [None]:
from pyngrok import ngrok

# close any old tunnels and agent
for t in ngrok.get_tunnels():
    ngrok.disconnect(t.public_url)
ngrok.kill()

public_url = ngrok.connect(8501, "http")
print("Streamlit app:", public_url)



Streamlit app: NgrokTunnel: "https://nonprominently-unpredisposing-burton.ngrok-free.dev" -> "http://localhost:8501"
