In [1]:
import os
import torch
from dotenv import load_dotenv
from huggingface_hub import login
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings, ChatHuggingFace
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# --- 1. SETUP & AUTH ---
load_dotenv()
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
if hf_token:
    login(token=hf_token)
    print("‚úÖ Authenticated")

# NEW: Initialize Pinecone Client and Index Name globally
pc = Pinecone(api_key=pinecone_api_key)
index_name = "resumes-index"  # Make sure this matches your Pinecone dashboard!

‚úÖ Authenticated


In [3]:
# --- 2. FUNCTIONS ---
def load_docs(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
    loader = DirectoryLoader(directory, glob="./*.pdf", loader_cls=PyPDFLoader)
    return loader.load()

def split_docs(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    return text_splitter.split_documents(documents)

def create_vector_store(chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    # Check if index exists, if not, create it (Optional but recommended)
    if index_name not in [idx.name for idx in pc.list_indexes()]:
        pc.create_index(
            name=index_name,
            dimension=384, 
            metric='cosine',
            spec=ServerlessSpec(cloud='aws', region='us-east-1')
        )

    vector_store = PineconeVectorStore.from_documents(
        documents=chunks,
        embedding=embeddings,
        index_name=index_name
    )
    return vector_store

In [4]:

# --- 3. LLM CONFIGURATION (Cloud Version) ---
model_id = "meta-llama/Llama-3.2-3B-Instruct"

print(f"üåê Connecting to Hugging Face API for {model_id}...")

# This does NOT download the model; it just creates a connection
raw_llm = HuggingFaceEndpoint(
    repo_id=model_id,
    task="text-generation",
    max_new_tokens=512,
    temperature=0.1,
    huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
)
# This wrapper solves the "conversational" task error
llm = ChatHuggingFace(llm=raw_llm)
print("‚úÖ Cloud LLM Connection Ready!")

üåê Connecting to Hugging Face API for meta-llama/Llama-3.2-3B-Instruct...
‚úÖ Cloud LLM Connection Ready!


In [5]:
# FIXED: Defined the prompt globally so ask_question can see it
prompt = ChatPromptTemplate.from_template(
    "You are an expert HR assistant. Answer based ONLY on the provided resumes:\n"
    "Context: {context}\n\n"
    "Question: {input}"
)

def ask_question(query, vector_db, llm):
    # FIXED: Use the vector_db passed from the UI
    retriever = vector_db.as_retriever(search_kwargs={"k": 5})
    
    rag_chain = (
        {"context": retriever, "input": RunnablePassthrough()}
        | prompt 
        | llm 
        | StrOutputParser()
    )
    return rag_chain.invoke(query)

In [6]:
# --- 5. EXECUTION ---
print("‚è≥ Processing Resumes...")
docs = load_docs("../resumes")
if docs:
    chunks = split_docs(docs)
    vector_db = create_vector_store(chunks)
    print(f"‚úÖ Loaded {len(docs)} resumes into Pinecone.")
else:
    print("‚ùå No resumes found. Loading existing index...")
    # Initialize vector_db even if no new docs are uploaded
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_db = PineconeVectorStore(index_name=index_name, embedding=embeddings)

‚è≥ Processing Resumes...
‚úÖ Loaded 5 resumes into Pinecone.


In [7]:
# --- 6. WEB UI ---
def screen_resume(user_question):
    try:
        # FIXED: ask_question now receives the correct number of arguments
        return ask_question(user_question, vector_db, llm)
    except Exception as e:
        return f"Error: {str(e)}"

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üìÑ Cloud-Powered Resume Screener (Pinecone Edition)")
    question = gr.Textbox(label="Question", placeholder="Which candidate is best for Python?")
    submit_btn = gr.Button("Analyze", variant="primary")
    output = gr.Textbox(label="AI Report", lines=10)
    submit_btn.click(fn=screen_resume, inputs=question, outputs=output)

demo.launch(share=False)

  with gr.Blocks(theme=gr.themes.Soft()) as demo:


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


