<a href="https://colab.research.google.com/github/Osama2321/Research_paper_app/blob/main/Research_Paper_app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import pdfplumber
import faiss
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from groq import Groq
import os

# Set API Key for Groq (Make sure you set it in your environment)
os.environ['GROQ_API_KEY'] = "your_groq_key_here"

# Load Embedding Model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Function to Extract Text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text.strip()

# Function to Chunk Text
def chunk_text(text, chunk_size=500, overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
    return text_splitter.split_text(text)

# Function to Store Chunks in FAISS
def store_in_faiss(chunks):
    embeddings = embedding_model.encode(chunks)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings))
    return index, chunks

# Function to Retrieve Relevant Chunks
def query_faiss(query, index, chunks, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, top_k)
    retrieved_texts = [chunks[i] for i in indices[0]]
    return " ".join(retrieved_texts)

# Function to Get Response from Groq API
def get_groq_summary(query, context):
    api_key = os.getenv("GROQ_API_KEY")
    if not api_key:
        return "Error: Groq API Key not found. Set it in your environment variables."

    client = Groq(api_key=api_key)

    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are an AI that summarizes research papers and extracts key points."},
            {"role": "user", "content": f"Context: {context}\n\n{query}"}
        ],
        model="llama-3.3-70b-versatile"
    )
    return chat_completion.choices[0].message.content

# Streamlit UI
st.set_page_config(page_title="RAG Research Paper App", layout="wide")
st.title("📄 RAG Research Paper Summarizer")

uploaded_file = st.file_uploader("Upload Research Paper (PDF)", type="pdf")

if uploaded_file:
    with open("temp.pdf", "wb") as f:
        f.write(uploaded_file.getbuffer())

    st.success("✅ File uploaded successfully!")

    # Extract and Process Text
    text = extract_text_from_pdf("temp.pdf")
    chunks = chunk_text(text)
    index, stored_chunks = store_in_faiss(chunks)

    st.success("✅ Text extracted and indexed successfully!")

    # User Query
    query = st.text_input("🔎 Ask something about this paper:")

    if st.button("Generate Summary"):
        context = query_faiss(query, index, stored_chunks)
        summary = get_groq_summary(query, context)

        st.subheader("📌 Summary:")
        st.write(summary)

        # Create download button
        summary_filename = "summary.txt"
        with open(summary_filename, "w") as f:
            f.write(summary)

        with open(summary_filename, "rb") as f:
            st.download_button(
                label="📥 Download Summary",
                data=f,
                file_name="research_summary.txt",
                mime="text/plain"
            )




In [None]:
!wget -q -O - ipv4.icanhazip.com

34.125.171.119


In [1]:
!streamlit run app.py & npx localtunnel --port 8501