In [8]:
# with Ollama

import os
from dotenv import load_dotenv

# langchain and community/partner packages
import pinecone
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOllama  # <-- IMPORT OLLAMA
from langchain_pinecone import PineconeVectorStore

In [9]:
# Load environment variables (only need Pinecone key now)
load_dotenv()
print("Loaded environment variables.")

Loaded environment variables.


In [10]:
def read_doc(directory):
    file_loader = PyPDFDirectoryLoader(directory)
    return file_loader.load()

In [11]:
def chunk_data(docs, chunk_size=800, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return text_splitter.split_documents(docs)

In [12]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [13]:
# Document loading and processing (remains the same)
print("\nReading and chunking documents...")
raw_docs = read_doc("documents")
documents = chunk_data(docs=raw_docs)
print(f"Loaded and chunked {len(documents)} document segments.")


Reading and chunking documents...
Loaded and chunked 9 document segments.


In [14]:
 # Embeddings model (remains the same)
print("\nInitializing embeddings model...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_test = embeddings.embed_query("This is a test.")
print(f"Embeddings initialized. Vector dimension: {len(vector_test)}")



Initializing embeddings model...
Embeddings initialized. Vector dimension: 768


In [15]:
# Pinecone setup (remains the same)
print("\nSetting up Pinecone vector store...")
api_key = os.getenv("PINECONE_API_KEY")
if not api_key: raise ValueError("PINECONE_API_KEY not found")
pc = pinecone.Pinecone(api_key=api_key)
index_name = "langchainvector"
if index_name not in pc.list_indexes().names():
    print(f"Creating index '{index_name}'...")
    pc.create_index(name=index_name, dimension=len(vector_test), metric="cosine", spec=pinecone.ServerlessSpec(cloud="aws", region="us-west-2"))
else:
    print(f"Index '{index_name}' already exists.")
vectorstore = PineconeVectorStore.from_documents(documents, embeddings, index_name=index_name)
print("Success! Vector store is ready.")


Setting up Pinecone vector store...
Index 'langchainvector' already exists.
Success! Vector store is ready.


In [16]:
# --- Setup RAG Chain with Ollama ---
print("\nBuilding the RAG chain using a local Ollama model...")
    
# 1. Retriever (remains the same)
retriever = vectorstore.as_retriever(search_kwargs={'k': 2})

# 2. Initialize the LLM using Ollama
# Make sure you have pulled this model with `ollama pull llama3:8b-instruct-q4_K_M`
llm = ChatOllama(model="llama3:8b-instruct-q4_K_M", temperature=0.7)

# 3. Prompt Template (remains the same)
template = "Use the following pieces of context to answer the question. If you don't know the answer, just say that you don't know.\n\nContext: {context}\n\nQuestion: {question}\n\nHelpful Answer:"
prompt = PromptTemplate.from_template(template)

# 4. RAG Chain (remains the same)
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
print("Retriever and RAG chain created successfully.")


Building the RAG chain using a local Ollama model...
Retriever and RAG chain created successfully.


In [19]:
# --- Invoke the Chain ---
our_query = "What are the skills"
print(f"\n--- Invoking RAG chain with query: '{our_query}' ---")
answer = rag_chain.invoke(our_query)
print("\n--- Question ---\n" + our_query)
print("\n--- Answer ---\n" + answer)


--- Invoking RAG chain with query: 'What are the skills' ---

--- Question ---
What are the skills

--- Answer ---
Based on the provided context, here are the skills mentioned:

**Data Science & Machine Learning**

* Deep Learning (CNN, RNN, Transfer Learning)
* Machine Learning
* Data Science
* Data Visualization
* SQL

**Computer Science Basics**

* Data Structures & Algorithms (DSA)
* Linux Commands
* Operating System (OS)

**Web Development**

* HTML
* CSS

**MERN Stack**

* MongoDB
* ExpressJS
* React
* NodeJS

**Version Control**

* Git
* GitHub

**Soft Skills**

* Presentation
* Teamwork & Collaboration
* Time Management
* Adaptability
* Problem Solving

**Frameworks & Libraries**

* TensorFlow
* Keras
* PyTorch
* Scikit-learn
* Pandas
* NumPy
* Matplotlib

Additionally, the context mentions that a comprehensive project was developed for implementing and simulating various CPU scheduling algorithms, demonstrating a foundational understanding of operating system principles.
