<a href="https://colab.research.google.com/github/ShailenderGoyal/Enigmatrix_Salesforce_Hackathon/blob/main/colab%20notebooks/enigmatrixPsH1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# Force reinstall GPU-compatible PyTorch with Triton support
!pip uninstall -y torch torchvision torchaudio numpy

# Install GPU-compatible PyTorch
!pip install --no-cache-dir --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Install all required and compatible packages
!pip install --no-cache-dir \
  transformers==4.35.2 \
  sentence-transformers==2.3.1 \
  faiss-cpu==1.7.4 \
  fastapi==0.105.0 \
  uvicorn==0.24.0.post1 \
  python-multipart==0.0.6 \
  pyngrok==7.0.0 \
  langchain==0.0.350 \
  langchain-community==0.0.13 \
  pillow==10.0.1 \
  numpy==1.26.4 \
  psutil

#  Restart the runtime after running this cell to apply changes





In [None]:

import os
from pyngrok import ngrok


NGROK_AUTH_TOKEN = ""  # Replace this with your own token
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

In [None]:
!pip install -q --upgrade google-generativeai


In [None]:
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document as LangchainDocument

import google.generativeai as genai
import os
import importlib
import datetime

# add you gemini api key
os.environ["GOOGLE_API_KEY"] = ""

class RAGGeminiSystem:
    def __init__(self):
        self.embedding_model = None
        self.vector_store = None

        self.embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50,
            length_function=len
        )

        self._setup_gemini_api()
        print("RAG-Gemini system initialized.")

    def _setup_gemini_api(self):
        try:
            api_key = os.getenv("GOOGLE_API_KEY")
            if not api_key:
                raise ValueError("Missing GOOGLE_API_KEY environment variable.")
            genai.configure(api_key=api_key)

            self.gemini_model = genai.GenerativeModel("models/gemini-2.0-flash-lite")
        except Exception as e:
            raise RuntimeError(f"Gemini API initialization failed: {str(e)}")

    def _load_embedding_model(self):
        if self.embedding_model is None:
            print("🔄 Loading embedding model...")
            try:
                importlib.invalidate_caches()
                self.embedding_model = HuggingFaceEmbeddings(
                    model_name=self.embedding_model_name
                )
                print("Embedding model loaded.")
            except ImportError:
                raise RuntimeError(
                    "sentence-transformers is not installed. Run: pip install sentence-transformers"
                )

    def add_documents(self, documents):
        try:
            self._load_embedding_model()

            all_chunks = []
            for doc in documents:
                chunks = self.text_splitter.split_text(doc["content"])
                for chunk in chunks:
                    all_chunks.append(
                        LangchainDocument(page_content=chunk, metadata=doc["metadata"])
                    )

            if not all_chunks:
                return {"status": "error", "message": "No valid document content to index."}

            if self.vector_store is None:
                self.vector_store = FAISS.from_documents(all_chunks, self.embedding_model)
            else:
                self.vector_store.add_documents(all_chunks)

            return {
                "status": "success",
                "message": f"Added {len(documents)} documents with {len(all_chunks)} total chunks."
            }

        except Exception as e:
            return {"status": "error", "message": f"Failed to add documents: {str(e)}"}

    def answer_question(self, question, top_k=3, store_response=True):
        try:
            if self.vector_store is None:
                return {"status": "error", "message": "Knowledge base is empty."}

            self._load_embedding_model()

            docs = self.vector_store.similarity_search(question, k=top_k)
            contexts = [doc.page_content for doc in docs]
            combined_context = "\n\n".join(contexts)

            prompt = (
                f"You are a helpful assistant with access to the following context:\n\n"
                f"{combined_context}\n\n"
                f"Based on the above information, answer the following question and respond cleanly without any markup symbols:\n"
                f"{question}"
            )

            # Use Gemini to generate response
            response = self.gemini_model.generate_content(prompt)

            if not hasattr(response, "text") or not response.text.strip():
                answer = "I don't have enough information to answer that question."
            else:
                answer = response.text.strip()

            # Optionally add Gemini's answer to vector DB
            if store_response and answer:
                metadata = {
                    "source": "gemini_response",
                    "question": question,
                    "timestamp": datetime.datetime.now().isoformat()
                }
                doc = {"content": answer, "metadata": metadata}
                self.add_documents([doc])

            return {
                "status": "success",
                "answer": answer,
                "sources": [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
            }

        except Exception as e:
            return {"status": "error", "message": f"Failed to answer question: {str(e)}"}


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain.text_splitter import RecursiveCharacterTextSplitter
import torch
import gc

class TextSummarizer:
    def __init__(self, model_name="philschmid/bart-large-cnn-samsum"):
        self.model_name = model_name
        self.tokenizer = None
        self.model = None
        print(f"Summarizer initialized with model '{self.model_name}' - model will be loaded when needed")

    def _load_model(self):
        if self.tokenizer is None or self.model is None:
            print("Loading summarization model...")
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
            if torch.cuda.is_available():
                self.model = self.model.to("cuda")
            print("Model loaded")

    def _unload_model(self):
        self.tokenizer = None
        self.model = None
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    def _smart_chunk(self, text, chunk_size=512, chunk_overlap=50):
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            separators=["\n\n", "\n", ".", "!", "?", ",", " "],
        )
        return splitter.split_text(text)

    def summarize(self, text, max_length=150, min_length=40):
        try:
            self._load_model()

            chunks = self._smart_chunk(text)
            summaries = []

            for chunk in chunks:
                inputs = self.tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
                if torch.cuda.is_available():
                    inputs = {k: v.to("cuda") for k, v in inputs.items()}

                summary_ids = self.model.generate(
                    inputs["input_ids"],
                    max_length=max_length,
                    min_length=min_length,
                    num_beams=4,
                    length_penalty=2.0,
                    early_stopping=True
                )

                summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
                summaries.append(summary)

            final_summary = " ".join(summaries)

            self._unload_model()

            return {
                "status": "success",
                "summary": final_summary,
                "note": f"Processed in {len(chunks)} chunk(s) using LangChain chunking"
            }

        except Exception as e:
            self._unload_model()
            return {
                "status": "error",
                "message": str(e)
            }


In [None]:
!pip install -q sentence-transformers


In [None]:
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document as LangchainDocument

from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
import torch.nn.functional as F
import gc
import importlib


class RAGSystem:
    def __init__(self):
        self.embedding_model = None
        self.tokenizer = None
        self.qa_model = None
        self.vector_store = None

        self.embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
        self.qa_model_name = "distilbert-base-cased-distilled-squad"

        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50,
            length_function=len
        )

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        print("RAG system initialized - models will be loaded when needed")

    def _load_embedding_model(self):
        if self.embedding_model is None:
            print("Loading embedding model...")
            try:
                importlib.invalidate_caches()
                self.embedding_model = HuggingFaceEmbeddings(
                    model_name=self.embedding_model_name,
                    model_kwargs={"device": self.device}
                )
            except ImportError:
                raise RuntimeError(
                    "sentence-transformers is not installed. "
                    "Install it using: pip install sentence-transformers"
                )
            print("Embedding model loaded")

    def _load_qa_model(self):
        if self.qa_model is None or self.tokenizer is None:
            print("Loading QA model...")
            self.tokenizer = AutoTokenizer.from_pretrained(self.qa_model_name)
            self.qa_model = AutoModelForQuestionAnswering.from_pretrained(self.qa_model_name)
            self.qa_model.to(self.device)
            print("QA model loaded")

    def _unload_qa_model(self):
        self.qa_model = None
        self.tokenizer = None
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    def add_documents(self, documents):
        try:
            self._load_embedding_model()

            # Flatten and chunk text
            all_chunks = []
            for doc in documents:
                chunks = self.text_splitter.split_text(doc["content"])
                for chunk in chunks:
                    all_chunks.append(
                        LangchainDocument(page_content=chunk, metadata=doc["metadata"])
                    )

            if not all_chunks:
                return {"status": "error", "message": "No valid document content to index."}

            if self.vector_store is None:
                self.vector_store = FAISS.from_documents(all_chunks, self.embedding_model)
            else:
                self.vector_store.add_documents(all_chunks)

            return {
                "status": "success",
                "message": f"Added {len(documents)} documents with {len(all_chunks)} total chunks."
            }

        except Exception as e:
            return {
                "status": "error",
                "message": f"Failed to add documents: {str(e)}"
            }

    def answer_question(self, question, top_k=3):
        try:
            if self.vector_store is None:
                return {"status": "error", "message": "Knowledge base is empty."}

            self._load_embedding_model()

            docs = self.vector_store.similarity_search(question, k=top_k)
            contexts = [doc.page_content for doc in docs]
            combined_context = " ".join(contexts)

            self._load_qa_model()

            inputs = self.tokenizer(
                question,
                combined_context,
                return_tensors="pt",
                max_length=512,
                truncation=True,
                padding=True
            )
            inputs = {k: v.to(self.device) for k, v in inputs.items()}

            with torch.no_grad():
                outputs = self.qa_model(**inputs)

            answer_start = torch.argmax(outputs.start_logits)
            answer_end = torch.argmax(outputs.end_logits) + 1

            input_ids = inputs["input_ids"][0]
            answer = self.tokenizer.convert_tokens_to_string(
                self.tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
            )

            if not answer.strip():
                answer = "I don't have enough information to answer that question."

            result = {
                "status": "success",
                "answer": answer,
                "sources": [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
            }

            return result

        except Exception as e:
            return {"status": "error", "message": str(e)}

        finally:
            self._unload_qa_model()


In [None]:
import sentence_transformers
print("sentence-transformers version:", sentence_transformers.__version__)


In [None]:
from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Body
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Dict, Any, Optional
import uvicorn
import json
import gc
import torch

app = FastAPI(title="Personalized Learning Assistant API")

# Enable CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize our modules

summarizer = None
rag_system = None
gemini_rag_system = None

@app.on_event("startup")
async def startup_event():
    import subprocess
    import sys

    # Force install inside FastAPI process (very important!)
    subprocess.call([sys.executable, "-m", "pip", "install", "sentence-transformers"])
    global summarizer, rag_system, gemini_rag_system


    summarizer = TextSummarizer()
    rag_system = RAGSystem()
    gemini_rag_system = RAGGeminiSystem()
    print("API initialized - models will be loaded on demand")

# Define request models
class Document(BaseModel):
    content: str
    metadata: Dict[str, Any]

class QuestionRequest(BaseModel):
    question: str

class SummarizeRequest(BaseModel):
    text: str
    max_length: Optional[int] = 150
    min_length: Optional[int] = 40

# Define API endpoints
@app.post("/rag/add_documents")
async def add_documents(documents: List[Document]):
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    plain_docs = [doc.dict() for doc in documents]
    result = rag_system.add_documents(plain_docs)
    return result

@app.post("/rag/answer")
async def answer_question(request: QuestionRequest):
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    result = rag_system.answer_question(request.question)
    return result

@app.post("/gemini_rag/add_documents")
async def add_documents_gemini(documents: List[Document]):
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    plain_docs = [doc.dict() for doc in documents]
    result = gemini_rag_system.add_documents(plain_docs)
    return result

@app.post("/gemini_rag/answer")
async def answer_question_gemini(request: QuestionRequest):
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    result = gemini_rag_system.answer_question(request.question)
    return result

@app.post("/summarize")
async def summarize_text(request: SummarizeRequest):
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    result = summarizer.summarize(
        request.text,
        max_length=request.max_length,
        min_length=request.min_length
    )
    return result


In [None]:
!pip install nest_asyncio

In [None]:
# Cell 7: Start the FastAPI server with ngrok
import nest_asyncio
import uvicorn

# Create a public URL
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url}")

# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()

# Start the FastAPI server
uvicorn.run(app, host="0.0.0.0", port=8000)

