In [None]:
pip install streamlit

In [None]:
pip install langchain-community==0.0.19

In [None]:
pip install langchain

In [None]:
pip install langchain-groq

In [None]:
pip install pyngrok

In [None]:
pip install faiss-cpu

In [None]:
pip install unstructured

In [None]:
app_py_content = """
import os
import streamlit as st
import time
import pickle
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

st.title("News Article RAG (Retrieval-Augmented Generation) Tool")
st.sidebar.title("News Article URLs")

# Sidebar for URLs
urls = []
for i in range(3):
    url = st.sidebar.text_input(f"URL {i+1}")
    urls.append(url)

process_url_clicked = st.sidebar.button("Process URLs")
file_path = "faiss_store.pkl"

main_placeholder = st.empty()

# Get Groq API Key from user
groq_api_key = st.sidebar.text_input("Enter your Groq API Key", type="password")

# Initialize ChatGroq if API key is provided
llm = None
if groq_api_key:
    llm = ChatGroq(
        groq_api_key=groq_api_key,
        model_name="llama-3.3-70b-versatile"
    )

if process_url_clicked:
    if not groq_api_key:
        st.error("Please enter your Groq API Key in the sidebar.")
    elif not any(urls):
        st.error("Please enter at least one URL.")
    else:
        # Filter out empty URLs
        valid_urls = [url for url in urls if url.strip()]
        
        try:
            # Load data
            main_placeholder.text("Data Loading...Started...")
            loader = UnstructuredURLLoader(urls=valid_urls)
            data = loader.load()
            main_placeholder.text(f"Data Loading...Completed. Loaded {len(data)} documents.")
            
            # Split data
            text_splitter = RecursiveCharacterTextSplitter(
                separators=['\\n\\n', '\\n', '.', ','],
                chunk_size=1000
            )
            docs = text_splitter.split_documents(data)
            main_placeholder.text(f"Text Splitting...Completed. Created {len(docs)} chunks.")
            
            # Create embeddings and save to FAISS index
            main_placeholder.text("Building Embedding Vector...Started...")
            embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
            vectorstore = FAISS.from_documents(docs, embeddings)
            main_placeholder.text("Embedding Vector Building...Completed.")
            
            # Save the FAISS index to a pickle file
            with open(file_path, "wb") as f:
                pickle.dump(vectorstore, f)
                
            main_placeholder.success("Processing complete! FAISS index saved. You can now ask questions.")
            
        except Exception as e:
            main_placeholder.error(f"An error occurred: {str(e)}")
            st.error("Please check your URLs and try again.")

query = main_placeholder.text_input("Ask a question about the articles:")

if query:
    if not groq_api_key:
        st.error("Please enter your Groq API Key in the sidebar.")
    elif os.path.exists(file_path):
        with open(file_path, "rb") as f:
            vectorstore = pickle.load(f)
            main_placeholder.info("FAISS index loaded. Generating answer...")
            chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())
            result = chain({"question": query})
            st.header("Answer")
            st.write(result["answer"])
            
            # Display sources if available
            sources = result.get("sources", "")
            if sources:
                st.subheader("Sources:")
                st.write(sources)
    else:
        main_placeholder.warning("FAISS index not found. Please process URLs first by clicking 'Process URLs'.")
"""

with open("app.py", "w") as f:
    f.write(app_py_content)

print("app.py created successfully.")

In [None]:
import subprocess

# Kill any previously running streamlit processes to ensure a clean start
!pkill streamlit

# Start streamlit in the background using nohup
# stdout and stderr will be redirected to nohup.out
# The "&" ensures the command runs in the background of the shell
command = "nohup streamlit run app.py --server.port 8502 > nohup.out 2>&1 &"
process = subprocess.Popen(command, shell=True)

print(f"Streamlit app started in background (PID: {process.pid}). Check nohup.out for logs.")
print("You can now access your Streamlit app using the ngrok Public URL you received from the next cell.")

In [None]:
from pyngrok import ngrok
import os

# Terminate open tunnels if any exist
ngrok.kill()

# --- IMPORTANT: PASTE YOUR NGROK AUTH TOKEN HERE ---
# Replace "YOUR_NGROK_AUTH_TOKEN" with your actual token from ngrok.com
# Example: ngrok.set_auth_token("2fK4...your_token_here...J7g")
ngrok.set_auth_token("390wcRwqWynNXPglxBkdoXAMa7X_XkNjNSvtx9K9SgfrSah2") # <--- YOUR TOKEN HAS BEEN ADDED HERE

# Streamlit will run on port 8502
streamlit_port = 8502

# Open a tunnel to the Streamlit port
tunnel = ngrok.connect(streamlit_port)
print(f"Streamlit Public URL: {tunnel.public_url}")
print("Access your Streamlit app using the URL above.")