In [20]:
!pip install -U langchain-community



In [None]:
# Step 1: Install required libraries
# Run this in your terminal or notebook to install dependencies
!pip install -q python-pptx langchain sentence-transformers faiss-cpu transformers accelerate bitsandbytes langchain-huggingface flask edge-tts pyngrok

# Step 2: Import necessary modules
from flask import Flask, request, jsonify, send_file
from pptx import Presentation
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFacePipeline
from langchain.schema import Document, BaseRetriever
from langchain.prompts import PromptTemplate
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM, BitsAndBytesConfig
from sentence_transformers import CrossEncoder
from typing import List
import torch
import edge_tts
import io
import os
import asyncio
from pyngrok import ngrok

# Initialize Flask app
app = Flask(__name__)

# Create uploads directory
os.makedirs('uploads', exist_ok=True)

# Step 3: Determine device (GPU if available, else CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize core components with device specification
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": device})
cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device=device)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)

# Initialize language model with 8-bit quantization and reduced beams
model_name = "google/flan-t5-xl"
tokenizer = AutoTokenizer.from_pretrained(model_name)
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=quantization_config
)
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.1,
    num_beams=2  # Reduced from 5 to 2 for faster generation
)
llm = HuggingFacePipeline(pipeline=pipe)

# Global variable for QA chain
qa_chain = None

# Step 4: Define custom retriever class for improved accuracy
class RerankingRetriever(BaseRetriever):
    faiss_retriever: BaseRetriever
    cross_encoder: CrossEncoder
    m: int = 3  # Number of top documents to return

    def _get_relevant_documents(self, query: str) -> List[Document]:
        # Retrieve initial documents using FAISS
        docs = self.faiss_retriever.invoke(query)
        # Rerank using cross-encoder for better relevance
        pairs = [[query, doc.page_content] for doc in docs]
        scores = self.cross_encoder.predict(pairs)
        for doc, score in zip(docs, scores):
            doc.metadata['relevance_score'] = score
        # Return top m documents sorted by relevance
        return sorted(docs, key=lambda x: x.metadata['relevance_score'], reverse=True)[:self.m]

# Step 5: Flask routes
@app.route('/upload', methods=['POST'])
def upload_ppt():
    global qa_chain
    if 'file' not in request.files:
        return jsonify({'error': 'No file uploaded'}), 400

    file = request.files['file']
    if file.filename == '':
        return jsonify({'error': 'No selected file'}), 400

    if file and file.filename.endswith('.pptx'):
        # Save and process PPT
        ppt_path = os.path.join('uploads', file.filename)
        file.save(ppt_path)

        # Extract text from PPT slides
        presentation = Presentation(ppt_path)
        documents = []
        for slide_number, slide in enumerate(presentation.slides):
            text = []
            for shape in slide.shapes:
                if hasattr(shape, "text"):
                    text.append(shape.text)
            if text:
                documents.append(Document(
                    page_content="\n".join(text),
                    metadata={"source": ppt_path, "slide": slide_number + 1}
                ))

        # Split text into manageable chunks
        chunks = text_splitter.split_documents(documents)

        # Create vector store and QA chain
        vector_store = FAISS.from_documents(chunks, embeddings)
        faiss_retriever = vector_store.as_retriever(search_kwargs={"k": 5})  # Reduced from 10 to 5
        reranking_retriever = RerankingRetriever(
            faiss_retriever=faiss_retriever,
            cross_encoder=cross_encoder,
            m=3
        )

        # Define prompt template for accurate and detailed answers
        prompt_template = PromptTemplate(
            input_variables=["context", "question"],
            template="""Using the information provided in the context from a PowerPoint presentation, please provide a detailed and accurate answer to the following question. If the context doesn't contain the necessary information, clearly state that the answer cannot be determined from the provided content.

Context: {context}

Question: {question}

Answer:"""
        )

        # Set up the QA chain without returning source documents
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=reranking_retriever,
            return_source_documents=False,  # Set to False to speed up processing
            chain_type_kwargs={"prompt": prompt_template}
        )

        return jsonify({'message': 'PowerPoint processed successfully'}), 200
    else:
        return jsonify({'error': 'Invalid file format. Only .pptx accepted'}), 400

@app.route('/ask', methods=['POST'])
def ask_question():
    if not qa_chain:
        return jsonify({'error': 'No PowerPoint processed yet'}), 400

    data = request.get_json()
    if 'question' not in data:
        return jsonify({'error': 'No question provided'}), 400

    question = data['question']

    # Process the question and get the answer
    result = qa_chain.invoke({"query": question})
    answer = result['result']

    # Generate audio response using Edge TTS
    async def generate_audio():
        communicate = edge_tts.Communicate(answer, "en-IN-PrabhatNeural")
        audio_bytes = io.BytesIO()
        async for chunk in communicate.stream():
            if chunk["type"] == "audio":
                audio_bytes.write(chunk["data"])
        audio_bytes.seek(0)
        return audio_bytes

    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    audio_bytes = loop.run_until_complete(generate_audio())

    # Send audio file as response
    return send_file(
        audio_bytes,
        mimetype="audio/mpeg",
        as_attachment=True,
        download_name="answer.mp3"
    )

# Step 6: Configure and start ngrok for public access
ngrok_token = "2tzYiTwRv1IDzqCsCTa2D9Pg2DJ_44oaejvYeXFCuM73UgLb1"  # Replace with your actual ngrok token
ngrok.set_auth_token(ngrok_token)
public_url = ngrok.connect(5000).public_url
print(f'Public URL: {public_url}')

# Step 7: Run Flask app
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.8/472.8 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m60.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m41.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m634.5/664.8 MB[0m [31m26.5 MB

ModuleNotFoundError: No module named 'pptx'