In [1]:
#!pip install langchain community
#!pip install --upgrade langchain langchain-community
#!pip install -U langchain langchain-ollama
#!pip install fpdf
#!pip install langchain_huggingface




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from langchain_ollama import OllamaLLM

llm = OllamaLLM(model="llama3")

In [3]:
#llm.invoke("What are 3 high-demand tech careers in 2025?")

In [4]:
from fpdf import FPDF

pdf = FPDF()

pdf.add_page()

pdf.set_font("Arial", size=12)

pdf.multi_cell(0, 10, "This is a sample career guide. Data science, cloud computing, and cybersecurity are top career paths in 2025.")

pdf.output("data/test_guide.pdf")

print("✅ Sample PDF created!")

✅ Sample PDF created!


In [5]:
import os

from langchain_community.vectorstores import FAISS

from langchain_huggingface import HuggingFaceEmbeddings

from langchain.document_loaders import PyPDFLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter

def load_or_create_vectorstore(data_dir="data", db_path="embeddings/career_db"):
    
    if os.path.exists(db_path):
        
        print(" Found existing FAISS DB. Loading...")
        
        return FAISS.load_local(
            
            db_path,
            
            HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"),
            
            allow_dangerous_deserialization=True
            
        )

    print(" No DB found. Loading PDFs and creating embeddings...")

    # Load PDFs
    documents = []
    
    for filename in os.listdir(data_dir):
        
        if filename.endswith(".pdf"):
            
            loader = PyPDFLoader(os.path.join(data_dir, filename))
            
            documents.extend(loader.load())

    if not documents:
        
        raise ValueError(" No PDFs found in the 'data' folder!")

    # Split into chunks
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    
    docs = splitter.split_documents(documents)

    print(f" Loaded and split into {len(docs)} chunks.")

    # Create FAISS vectorstore
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    
    db = FAISS.from_documents(docs, embeddings)
    
    db.save_local(db_path)
    
    print(" FAISS DB created and saved.")
    
    return db

In [8]:
from langchain.chains import RetrievalQA

from langchain_ollama import OllamaLLM

# Load or create vector DB
db = load_or_create_vectorstore()

# Set up retriever and LLM
retriever = db.as_retriever(search_kwargs={"k": 3})

llm = OllamaLLM(model="llama3")

# Build QA chain
qa_chain = RetrievalQA.from_chain_type(
    
    llm=llm,
    
    retriever=retriever,
    
    return_source_documents=True
    
)

# Sample query
query = "What are the most important tech careers in 2025?"

response = qa_chain.invoke({"query": query})

print(" Answer:\n", response["result"])

 Found existing FAISS DB. Loading...
 Answer:
 According to the given context, the most important tech careers in 2025 are:

Data Science
Cloud Computing
Cybersecurity
