In [2]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
# from langchain_community.llms import HuggingFacePipeline
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.prompts import PromptTemplate

from langchain_community.llms import Ollama


In [3]:
# --- Step 1: Data Ingestion & Preprocessing ---
def load_and_preprocess_blogs(blog_dir="blogs"):
    raw_texts = []
    for filename in os.listdir(blog_dir):
        if filename.endswith(".txt"):
            filepath = os.path.join(blog_dir, filename)
            with open(filepath, "r", encoding="utf-8") as f:
                content = f.read()
                # Assuming the first line is the title
                lines = content.split('\n', 1)
                title = lines[0].replace("Title: ", "").strip() if lines else "No Title"
                text_content = lines[1].strip() if len(lines) > 1 else content.strip()
                raw_texts.append({"title": title, "text": text_content})
    return raw_texts

def chunk_texts(raw_texts, chunk_size=200, chunk_overlap=20):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        add_start_index=True,
    )
    documents = []
    for item in raw_texts:
        chunks = text_splitter.create_documents([item["text"]])
        for i, chunk in enumerate(chunks):
            # Add title and chunk index to metadata
            chunk.metadata = {"title": item["title"], "chunk_index": i}
            documents.append(chunk)
    return documents

In [4]:
# Load and chunk the data
blog_data = load_and_preprocess_blogs()
documents = chunk_texts(blog_data)

print(f"Loaded {len(blog_data)} blog posts.")
print(f"Generated {len(documents)} document chunks.")
# Print a sample chunk to verify
if documents:
    print("\nSample Chunk:")
    print(documents[0].page_content)
    print(documents[0].metadata)

Loaded 2 blog posts.
Generated 31 document chunks.

Sample Chunk:
When people talk about high performance, they often focus on time management, discipline, or motivation. But the real engine behind consistent output is not time — it’s energy. Energy fuels our
{'title': 'Energy Mastery: The Secret to Sustainable High Performance', 'chunk_index': 0}


In [None]:
# --- Step 2: Embedding + Vector Store ---
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
vector_store_path = "faiss_index"

def create_and_save_vector_store(documents, model_name=EMBEDDING_MODEL_NAME, path=vector_store_path):
    print(f"\nLoading embeddings model: {model_name}...")
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    print("Creating FAISS vector store...")
    vector_store = FAISS.from_documents(documents, embeddings)
    print(f"Saving FAISS index to {path}...")
    vector_store.save_local(path)
    print("FAISS index saved.")
    return vector_store

def load_vector_store(model_name=EMBEDDING_MODEL_NAME, path=vector_store_path):
    print(f"\nLoading embeddings model: {model_name}...")
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    print(f"Loading FAISS index from {path}...")
    vector_store = FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True) # Added for safety, if loading an existing index
    print("FAISS index loaded.")
    return vector_store

# Create and save the vector store (run this once to generate the index)
faiss_vector_store = create_and_save_vector_store(documents)



Loading embeddings model: sentence-transformers/all-MiniLM-L6-v2...


  embeddings = HuggingFaceEmbeddings(model_name=model_name)
  from .autonotebook import tqdm as notebook_tqdm


Creating FAISS vector store...


  return forward_call(*args, **kwargs)


Saving FAISS index to faiss_index...
FAISS index saved.


In [None]:
# --- Step 3: LangChain Integration ---
"""LLM_MODEL_NAME = "gpt2"

# Initialize the LLM
print(f"\nLoading LLM model: {LLM_MODEL_NAME}...")
try:
    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(LLM_MODEL_NAME)
    # Ensure pad_token_id is set for GPT2 tokenizer
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token # Or any other suitable token
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=250, # Increased for potentially longer answers
        temperature=0.7,
        top_p=0.95,
        repetition_penalty=1.15
    )
    llm = HuggingFacePipeline(pipeline=pipe)
    print("LLM model loaded.")
except Exception as e:
    print(f"Error loading LLM model {LLM_MODEL_NAME}: {e}")
    print("Attempting to load a smaller alternative (e.g., 'distilgpt2').")
    LLM_MODEL_NAME = "distilgpt2"
    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(LLM_MODEL_NAME)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=250,
        temperature=0.7,
        top_p=0.95,
        repetition_penalty=1.15
    )
    llm = HuggingFacePipeline(pipeline=pipe)
    print(f"Successfully loaded {LLM_MODEL_NAME} as a fallback.")"""



# Initialize Mistral via Ollama
llm = Ollama(model="mistral")
print("Mistral via Ollama loaded.")

# Create a FAISS Retriever
retriever = faiss_vector_store.as_retriever()

# Define the prompt template
prompt_template = """You are a helpful assistant. Use the context below to answer the user's question.
If the answer is not in the provided context, politely state that you don't have enough information.

Context:
{context}

Question:
{question}

Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=prompt_template)

# Create the ConversationalRetrievalChain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    combine_docs_chain_kwargs={"prompt": QA_CHAIN_PROMPT}
)

print("\nLangChain ConversationalRetrievalChain initialized.")

# --- Sample Queries ---
chat_history = []

def get_answer_from_rag(query, history):
    result = qa_chain.invoke({"question": query, "chat_history": history})
    return result["answer"]

print("\n--- Testing RAG Pipeline ---")

# Question 1: "What is energy mastery?"
query1 = "What is energy mastery?"
print(f"\nQuestion 1: {query1}")
response1 = get_answer_from_rag(query1, chat_history)
print(f"Answer: {response1}")
chat_history.append((query1, response1))

# Question 2: "How do high performers avoid burnout?"
query2 = "How do high performers avoid burnout?"
print(f"\nQuestion 2: {query2}")
response2 = get_answer_from_rag(query2, chat_history)
print(f"Answer: {response2}")
chat_history.append((query2, response2))

Mistral via Ollama loaded.

LangChain ConversationalRetrievalChain initialized.

--- Testing RAG Pipeline ---

Question 1: What is energy mastery?


  llm = Ollama(model="mistral")
  return forward_call(*args, **kwargs)


Answer:  Energy mastery refers to a way of managing one's energy effectively, rather than simply focusing on working longer hours. It involves doing less, but better, by investing in recovery between periods of intense cognitive effort, making informed decisions that affect energy levels, and scheduling tasks during the time when energy is highest, often mornings.

Question 2: How do high performers avoid burnout?


  return forward_call(*args, **kwargs)


Answer:  High performers use a method that involves structuring their weeks with cycles of push (working intensely) and pause (resting), recognizing rest as fuel, not weakness. They also focus on maintaining emotional hygiene through practices like journaling, coaching, mindfulness, and open communication with friends to prevent emotional bottlenecks.
