In [7]:
!pip install streamlit PyPDF2 sentence-transformers transformers groq langchain langchain-community faiss-cpu



Collecting streamlit
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting groq
  Downloading groq-0.28.0-py3-none-any.whl.metadata (15 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.25-py3-none-any.whl.metadata (2.9 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Downloading langchain_core-0.3.65-py3-none-any.whl.metadata (5.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langcha

In [None]:
import os
os.environ['GROQ_API_KEY'] = "YOUR_API_KEY_HERE"

In [None]:
%%writefile app.py
import os
from groq import Groq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from PyPDF2 import PdfReader
import streamlit as st
from tempfile import NamedTemporaryFile

# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file_path):
    pdf_reader = PdfReader(pdf_file_path)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

# Function to split text into chunks
def chunk_text(text, chunk_size=500, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return text_splitter.split_text(text)

# Function to create embeddings and store them in FAISS
def create_embeddings_and_store(chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    vector_db = FAISS.from_texts(chunks, embedding=embeddings)
    return vector_db

# Function to query the vector database and interact with Groq
def query_vector_db(query, vector_db):
    # Retrieve relevant documents
    docs = vector_db.similarity_search(query, k=3)
    context = "\n".join([doc.page_content for doc in docs])

    # Interact with Groq API
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": f"Use the following context:\n{context}"
            },
            {
                "role": "user",
                "content": query
            },
        ],
        model="llama3-8b-8192",
    )
    return chat_completion.choices[0].message.content

# Streamlit app
st.title("RAG-Based Application")

# Upload PDF
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")

if uploaded_file:
    with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
        temp_file.write(uploaded_file.read())
        pdf_path = temp_file.name

    # Extract text
    text = extract_text_from_pdf(pdf_path)
    st.write("PDF Text Extracted Successfully!")

    # Chunk text
    chunks = chunk_text(text)
    st.write("Text Chunked Successfully!")

    # Generate embeddings and store in FAISS
    vector_db = create_embeddings_and_store(chunks)
    st.write("Embeddings Generated and Stored Successfully!")

    # User query input
    user_query = st.text_input("Enter your query:")
    if user_query:
        response = query_vector_db(user_query, vector_db)
        st.write("Response from LLM:")
        st.write(response)



2025-06-14 08:07:01.933 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


In [10]:
!wget -q -O - ipv4.icanhazip.com

34.53.108.54


In [None]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.53.108.54:8501[0m
[0m
your url is: https://light-snakes-shave.loca.lt
