# Streamlit-based RAG with Wikipedia Data

In [19]:
!pip install streamlit wikipedia-api sentence-transformers pyngrok




Loading and preprocessing the dataset

In [20]:
import pandas as pd

# Load supply chain risk dataset (Ensure your dataset contains textual insights)
df = pd.read_csv("/content/Contacts.csv")  # Modify with your dataset path

# Check dataset structure
print(df.head())

# Convert text column into a list (modify column name as needed)
documents = df["Acquisition Source"].tolist()


                                     ID                       Email Country  \
0  5602e338-32d3-4832-a15e-fa745b2cc9fa     pameladavis@example.com      US   
1  4266b97b-d11a-4b4f-8172-269e65bc4620         james65@example.net      FR   
2  b4a42fad-b3ec-4ae1-a080-c7f462f55c0d       spencer51@example.org     GER   
3  f6227877-85a8-4f50-aa21-5f19cac2970f  samanthaburton@example.net     GER   
4  f18ad397-6e77-48fa-86e7-d13d1c872f2f            ufox@example.com      FR   

           City             Phone Firstname   Birthdate  Postal Code  \
0     Ericatown   +1-969-273-1117    Nicole  1942-11-03        11832   
1  Cassandraton      516-659-4429   Matthew  1995-06-14        60795   
2  Deborahburgh      881-295-6723    Sandra  1943-06-24        60638   
3   Melissaport     (280)633-2086   Rebecca  1998-11-03        70089   
4   New Cynthia  001-911-890-9417    Gloria  1956-11-30        59814   

  Acquisition Source           Created At           Updated At  
0          promoMail  2021-

In [24]:
!pip install faiss-cpu



Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m60.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [25]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Load a pre-trained sentence transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Convert text data into dense vector embeddings
embeddings = model.encode(documents, convert_to_numpy=True)

# Build a FAISS index for fast similarity search
index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance for similarity search
index.add(embeddings)  # Add embeddings to the FAISS index

# Save index and embeddings for future use
faiss.write_index(index, "contacts_data.faiss")
np.save("contacts_embeddings.npy", embeddings)


In [26]:
def retrieve_top_contacts(query, top_k=5):
    """
    Retrieve the most relevant contacts based on a user query.
    """
    query_embedding = model.encode([query], convert_to_numpy=True)
    _, indices = index.search(query_embedding, top_k)  # Search top_k results

    results = [documents[i] for i in indices[0]]  # Get relevant documents
    return results

# Example Usage
query = "What are the major logistics risks this quarter?"
results = retrieve_top_contacts(query)
for res in results:
    print(res)


promoMail
promoMail
promoMail
promoMail
promoMail


In [27]:
import streamlit as st
import wikipediaapi
from sentence_transformers import SentenceTransformer
from pyngrok import ngrok
import os
import pandas as pd

# Function to fetch Wikipedia summary
def fetch_wikipedia_summary(query):
    wiki = wikipediaapi.Wikipedia(language='en', user_agent="Mozilla/5.0 (compatible; MyBot/0.1; +http://mybot.com/info)")
    page = wiki.page(query)
    if page.exists():
        return page.summary
    else:
        return "No Wikipedia page found for the given query."


In [28]:
# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')


In [41]:
%%writefile app.py
import streamlit as st
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pandas as pd

# Load FAISS index and dataset
df = pd.read_csv("/content/Contacts.csv")  # Modify dataset path
documents = df["Acquisition Source"].tolist()

# Load pre-trained model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Load FAISS index and embeddings
index = faiss.read_index("contacts_data.faiss")

# Streamlit UI
st.title("📊 AI-Powered Supply Chain Contacts Search")
st.write("Enter a query to retrieve the most relevant contacts.")

query = st.text_input("Enter your query:")
if st.button("Search Contacts"):
    if query:
        query_embedding = model.encode([query], convert_to_numpy=True)
        _, indices = index.search(query_embedding, 5)  # Top 5 results
        results = [documents[i] for i in indices[0]]

        st.subheader("🔍 Top Contacts:")
        for i, res in enumerate(results):
            st.write(f"**{i+1}.** {res}")

    else:
        st.warning("Please enter a valid query.")



Overwriting app.py


In [44]:
# Run the Streamlit app
!streamlit run app.py &


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8502[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8502[0m
[34m  External URL: [0m[1mhttp://34.90.141.181:8502[0m
[0m
[34m  Stopping...[0m


In [45]:
# Kill any existing ngrok and Streamlit processes
!pkill -f streamlit
!pkill -f ngrok

# Re-run ngrok authentication (Kills if any other site is running, to avoid traffic)
!ngrok authtoken 2sh8tZRTzmljkt4YiY7hv3Qv1SI_fY1azBK9xS6q9eqN2vzp

# Start Streamlit again
!nohup streamlit run app.py --server.port 8501 &

# Reconnect ngrok to expose Streamlit
from pyngrok import ngrok
import time

time.sleep(5)  # Wait for Streamlit to start
public_url = ngrok.connect(8501)
print(f" New Public Link: {public_url}")


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
nohup: appending output to 'nohup.out'
 New Public Link: NgrokTunnel: "https://371c-34-90-141-181.ngrok-free.app" -> "http://localhost:8501"
