In [None]:
import os
os.environ["OPENAI_API_KEY"] = "Your_API_Key"

In [None]:
%%writefile app.py
import os
import streamlit as st
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.vectorstores import FAISS

st.title('News Articles Research Tool 📈')
st.sidebar.title('News Article URLs')

# You must set OPENAI_API_KEY in the notebook (os.environ) before launching this app.
llm = OpenAI(temperature=0.2)
embeddings = OpenAIEmbeddings()

INDEX_DIR = "faiss_index"  # folder that will hold index.faiss + index.pkl

# --- Sidebar: collect up to 3 URLs ---
urls = []
for i in range(3):
    url = st.sidebar.text_input(f'URL {i+1}', key=f'url_{i}')
    if url:
        urls.append(url)

process_url_clicked = st.sidebar.button('Process URL')
status = st.empty()

if process_url_clicked:
    if not urls:
        st.warning("Please enter at least one URL.")
    else:
        try:
            # 1) Load data
            status.text("Data Loading... Started ✅")
            loader = UnstructuredURLLoader(urls=urls)
            data = loader.load()

            # 2) Split text
            status.text("Text Splitting... Started ✅")
            splitter = RecursiveCharacterTextSplitter(
                separators=['\n\n', '\n', '.', ','],
                chunk_size=1000,
                chunk_overlap=200,
            )
            docs = splitter.split_documents(data)

            # 3) Build embeddings + FAISS
            status.text("Building FAISS index... Started ✅")
            vectorstore = FAISS.from_documents(docs, embeddings)

            # 4) Save to disk (NO pickle)
            vectorstore.save_local(INDEX_DIR)
            status.text(f"Index saved to ./{INDEX_DIR} 🎉")

        except Exception as e:
            st.error(f"Error while processing: {e}")

# --- Ask a question ---
query = st.text_input("Question:")

if query:
    if not os.path.isdir(INDEX_DIR):
        st.error("No index found. Please process URLs first.")
    else:
        try:
            # Reload the FAISS store (safe, no pickle)
            vectorstore = FAISS.load_local(
                INDEX_DIR,
                embeddings,
                allow_dangerous_deserialization=True  # needed because metadata is pickled internally
            )

            chain = RetrievalQAWithSourcesChain.from_llm(
                llm=llm,
                retriever=vectorstore.as_retriever()
            )
            result = chain({"question": query}, return_only_outputs=True)

            st.header("Answer")
            st.write(result.get("answer", ""))

            sources = result.get("sources", "")
            if sources:
                st.subheader("Sources:")
                for line in str(sources).splitlines():
                    if line.strip():
                        st.write(line.strip())

        except Exception as e:
            st.error(f"Error during Q&A: {e}")


Writing app.py


In [None]:
# kill any previous server on 8501, then start Streamlit
!fuser -k 8501/tcp >/dev/null 2>&1 || true
import subprocess, time
p = subprocess.Popen(["streamlit", "run", "app.py", "--server.port=8501", "--server.headless=true"])
time.sleep(3)  # give it a moment to boot

In [None]:
from pyngrok import ngrok

# close any old tunnels and agent
for t in ngrok.get_tunnels():
    ngrok.disconnect(t.public_url)
ngrok.kill()

public_url = ngrok.connect(8501, "http")
print("Streamlit app:", public_url)

Streamlit app: NgrokTunnel: "https://nonprominently-unpredisposing-burton.ngrok-free.dev" -> "http://localhost:8501"
