In [1]:
import os
import torch
from dotenv import load_dotenv
from huggingface_hub import login
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
import gradio as gr
import chromadb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# --- 1. SETUP & AUTH ---
load_dotenv()
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if hf_token:
    login(token=hf_token)
    print("‚úÖ Authenticated")

‚úÖ Authenticated


In [3]:
# --- 2. DEFINE THE MISSING FUNCTIONS (This fixes the NameError!) ---
def load_docs(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f"üìÅ Created directory '{directory}'. Add your PDFs there!")
    loader = DirectoryLoader(directory, glob="./*.pdf", loader_cls=PyPDFLoader)
    return loader.load()

def split_docs(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    return text_splitter.split_documents(documents)

def create_vector_store(chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    # Use a persistent client to manage the collection
    client = chromadb.PersistentClient(path="../db_resume")
    
    # This deletes the old data so you don't get duplicates on every run
    try:
        client.delete_collection("resumes_collection")
    except:
        pass # Collection didn't exist yet, that's fine
    
    return Chroma.from_documents(
        documents=chunks, 
        embedding=embeddings,
        persist_directory="../db_resume",
        collection_name="resumes_collection"
    )

In [4]:

# --- 3. LLM CONFIGURATION (Cloud Version) ---
model_id = "meta-llama/Llama-3.2-3B-Instruct"

print(f"üåê Connecting to Hugging Face API for {model_id}...")

# This does NOT download the model; it just creates a connection
raw_llm = HuggingFaceEndpoint(
    repo_id=model_id,
    task="text-generation",
    max_new_tokens=512,
    temperature=0.1,
    huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
)
# This wrapper solves the "conversational" task error
llm = ChatHuggingFace(llm=raw_llm)
print("‚úÖ Cloud LLM Connection Ready!")

üåê Connecting to Hugging Face API for meta-llama/Llama-3.2-3B-Instruct...
‚úÖ Cloud LLM Connection Ready!


In [5]:
# --- 4. RAG CHAIN LOGIC ---
def ask_question(query, vector_db, llm):
    retriever = vector_db.as_retriever(search_kwargs={"k": 5})
    prompt = ChatPromptTemplate.from_template(
        "You are an expert HR assistant. Answer based ONLY on the provided resumes:\n"
        "Context: {context}\n\n"
        "Question: {input}"
    )
    rag_chain = (
        {"context": retriever, "input": RunnablePassthrough()}
        | prompt | llm | StrOutputParser()
    )
    return rag_chain.invoke(query)

In [6]:
# --- 5. EXECUTION ---
# This part now knows what load_docs, split_docs, etc., are!
print("‚è≥ Processing Resumes...")
docs = load_docs("../resumes")
if docs:
    chunks = split_docs(docs)
    vector_db = create_vector_store(chunks)
    print(f"‚úÖ Loaded {len(docs)} resumes into the database.")
else:
    print("‚ùå No resumes found in ./resumes folder.")
    vector_db = None

‚è≥ Processing Resumes...


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


‚úÖ Loaded 5 resumes into the database.


In [7]:
# --- 6. WEB UI ---
def screen_resume(user_question):
    if vector_db is None:
        return "Please add PDF resumes to the '../resumes' folder and restart."
    try:
        return ask_question(user_question, vector_db, llm)
    except Exception as e:
        return f"Error: {str(e)}"

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üìÑ Cloud-Powered Resume Screener")
    question = gr.Textbox(label="Question", placeholder="Which candidate is best for Python?")
    submit_btn = gr.Button("Analyze", variant="primary")
    output = gr.Textbox(label="AI Report", lines=10)
    submit_btn.click(fn=screen_resume, inputs=question, outputs=output)

demo.launch(share=False)

  with gr.Blocks(theme=gr.themes.Soft()) as demo:


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given
