In [3]:
# Step 1: Environment Setup
!pip install langchain huggingface_hub faiss-cpu sentence-transformers

# Step 2: Import Required Modules
from langchain.chains import LLMChain, SequentialChain
from langchain.prompts import PromptTemplate
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFaceHub

# Step 3: Data Preparation & Vector DB Setup
# Load domain-specific documents
loader = TextLoader("domain_docs.txt")
documents = loader.load()

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
texts = text_splitter.split_documents(documents)

# Create embeddings and vector store
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)
vectorstore = FAISS.from_documents(texts, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# Step 4: LLM Initialization
llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
    model_kwargs={"temperature": 0.7, "max_length": 2000}
)

# Step 5: Prompt Engineering
outline_template = """Generate a structured outline for answering this question:
Question: {question}
Context: {context}
Create a comprehensive outline that addresses all key aspects."""
outline_prompt = PromptTemplate(
    input_variables=["question", "context"],
    template=outline_template
)

draft_template = """Develop a detailed response based on the following outline:
Outline: {outline}
Context: {context}
Write a complete draft answer with technical details and examples:"""
draft_prompt = PromptTemplate(
    input_variables=["outline", "context"],
    template=draft_template
)

revise_template = """Refine this draft into a polished response:
Draft: {draft}
Context: {context}
Improve coherence, flow, and technical accuracy. Add transitional phrases and ensure logical structure:"""
revise_prompt = PromptTemplate(
    input_variables=["draft", "context"],
    template=revise_template
)

# Step 6: Chain Construction
outline_chain = LLMChain(
    llm=llm,
    prompt=outline_prompt,
    output_key="outline"
)

draft_chain = LLMChain(
    llm=llm,
    prompt=draft_prompt,
    output_key="draft"
)

revise_chain = LLMChain(
    llm=llm,
    prompt=revise_prompt,
    output_key="final_answer"
)

# Step 7: Sequential Chain Integration
full_chain = SequentialChain(
    chains=[outline_chain, draft_chain, revise_chain],
    input_variables=["question", "context"],
    output_variables=["final_answer"],
    verbose=True
)

# Step 8: Retrieval-Augmented Generation Pipeline
def generate_answer(question):
    # Retrieve relevant context
    docs = retriever.get_relevant_documents(question)
    context = "\n\n".join([d.page_content for d in docs])

    # Execute multi-step chain
    result = full_chain({
        "question": question,
        "context": context
    })
    return result['final_answer']

# Step 9: Example Usage
question = "Explain the multi-stage fermentation process used in industrial biotechnology?"
answer = generate_answer(question)
print(answer)



RuntimeError: Error loading domain_docs.txt