In [10]:
import os
import re
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS, Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

In [11]:
# Sep 1: DATA LOADING

class DataLoader:
    def load_job_description(self, job_text):
        return [Document(page_content=job_text)]
    
    def load_resume(self, file):
        if file.name.endswith('.pdf'):
            loader = PyPDFLoader(file)
        else:
            loader = TextLoader(file.name)
        return loader.load()

# Test
loader = DataLoader()
print("‚úÖ CELL 2: Data Loader Ready!")

‚úÖ CELL 2: Data Loader Ready!


In [12]:
# *CELL 3: TEXT CHUNKING


class TextChunker:
    def __init__(self):
        self.splitter = RecursiveCharacterTextSplitter(
            chunk_size=300, chunk_overlap=50  # Resume optimized
        )
    
    def chunk_documents(self, docs):
        return self.splitter.split_documents(docs)

# Test  
chunker = TextChunker()
print("‚úÖ CELL 3: Text Chunker Ready!")

‚úÖ CELL 3: Text Chunker Ready!


In [13]:
## *CELL 4: EMBEDDING MANAGER 

class EmbeddingManager:
    def __init__(self):
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"  # FREE!
        )
    
    def get_embeddings(self):
        return self.embeddings

# Test
embedder = EmbeddingManager()
print("‚úÖ CELL 4: Embeddings Ready!")

‚úÖ CELL 4: Embeddings Ready!


In [14]:
# *CELL 5: VECTOR STORE (FAISS)

class VectorStore:
    def __init__(self):
        self.embedding_manager = EmbeddingManager()
        self.job_store = None
    
    def create_vectorstore(self, chunks):
        return FAISS.from_documents(chunks, self.embedding_manager.get_embeddings())
    
    def similarity_search(self, vectorstore, query, k=5):
        return vectorstore.similarity_search(query, k=k)

# Test
vector_store = VectorStore()
print("‚úÖ CELL 5: Vector Store Ready!")

‚úÖ CELL 5: Vector Store Ready!


In [15]:
## *CELL 6: PROMPT + CONTEXT RETRIEVAL

class PromptRetriever:
    def get_context(self, vectorstore, query="skills experience python ml langchain"):
        matches = vectorstore.similarity_search(query, k=5)
        return "\n".join([doc.page_content for doc in matches])
    
    def get_prompt(self, job_desc, context):
        return f"""
AI RESUME SCREENER (Krish Naik RAG)

JOB REQUIREMENTS:
{job_desc}

RESUME MATCHES:
{context}

Score 0-100:
Skills: 40pts | Experience: 30pts | Projects: 20pts | Education: 10pts

**FORMAT:**
**SCORE:** XX/100
**VERDICT:** Hire/Maybe/Reject
**STRENGTHS:** [3 bullets]
**GAPS:** [3 bullets]
"""

# Test
retriever = PromptRetriever()
print("‚úÖ CELL 6: Prompt + Retriever Ready!")

‚úÖ CELL 6: Prompt + Retriever Ready!


In [16]:
## *CELL 7: LLM GENERATOR

class LLMGenerator:
    def generate(self, prompt):
        response = ollama.chat(model='mistral:7b', messages=[
            {'role': 'user', 'content': prompt}
        ])
        return response['message']['content']

# Test
generator = LLMGenerator()
print("‚úÖ CELL 7: LLM Generator Ready!")

‚úÖ CELL 7: LLM Generator Ready!


In [17]:
## *CELL 8: COMPLETE RAG PIPELINE

class ResumeRAGPipeline:
    def __init__(self):
        self.loader = DataLoader()
        self.chunker = TextChunker()
        self.vector_store = VectorStore()
        self.retriever = PromptRetriever()
        self.generator = LLMGenerator()
        st.success("üöÄ COMPLETE 8-STEP RAG PIPELINE READY!")
    
    def run_pipeline(self, job_desc, resume_file):
        # STEP 1: Load Data
        st.info("üì• 1. LOADING DATA...")
        job_docs = self.loader.load_job_description(job_desc)
        resume_docs = self.loader.load_resume(resume_file)
        
        # STEP 2: Chunk
        st.info("‚úÇÔ∏è 2. CHUNKING...")
        job_chunks = self.chunker.chunk_documents(job_docs)
        resume_chunks = self.chunker.chunk_documents(resume_docs)
        
        # STEP 3-4: Embeddings + Vector Store  
        st.info("üî¢ 3-4. EMBEDDING + VECTOR STORE...")
        job_vectorstore = self.vector_store.create_vectorstore(job_chunks)
        
        # STEP 5-6: Retrieve Context
        st.info("üîç 5-6. RETRIEVING CONTEXT...")
        context = self.retriever.get_context(job_vectorstore)
        prompt = self.retriever.get_prompt(job_desc, context)
        
        # STEP 7: Generate
        st.info("ü§ñ 7-8. AI GENERATION...")
        result = self.generator.generate(prompt)
        
        return result

print("‚úÖ CELL 8: RAG Pipeline Complete!")


‚úÖ CELL 8: RAG Pipeline Complete!
