In [22]:
import streamlit as st
import pandas as pd
import time
import re
from seleniumbase import SB

# --- 1. CLEANING FUNCTIONS ---
def clean_price(price_str):
    try:
        clean_str = price_str.replace(',', '')
        match = re.search(r'(\d+(\.\d+)?)', clean_str)
        return float(match.group(1)) if match else None
    except: return None

def extract_unit(price_str):
    try:
        match = re.search(r'/\s*(\w+)', price_str)
        return match.group(1).strip().capitalize() if match else "Unit/Request"
    except: return "N/A"

# --- 2. THE SCRAPER ENGINE ---
def run_scraper(query):
    with SB(uc=True, headless=True) as sb: # Headless=True for web app feel
        url = f"https://dir.indiamart.com/search.mp?ss={query.replace(' ', '+')}"
        sb.uc_open_with_reconnect(url, reconnect_time=4)
        
        # Auto-scroll to load products
        for _ in range(3):
            sb.execute_script("window.scrollBy(0, 1000);")
            time.sleep(1)
            
        listings = []
        price_elements = sb.find_elements('//*[contains(text(), "â‚¹")]')
        
        for p in price_elements:
            try:
                raw_price = p.text.strip()
                parent = p.find_element('xpath', './ancestor::div[contains(@class, "card") or contains(@class, "lst") or contains(@class, "item")]')
                link_el = parent.find_element('xpath', './/a[contains(@href, "indiamart.com/proddetail")]')
                
                name = link_el.text if link_el.text else "Product"
                seller = parent.find_element('xpath', './/div[contains(@class, "comp")] | .//a[contains(@class, "ls_nm")]').text
                
                try:
                    location = parent.find_element('xpath', './/span[contains(@class, "city")] | .//span[contains(@class, "loc")]').text
                except: location = "Not Specified"
                
                listings.append({
                    "Product": name.strip(),
                    "Price": raw_price,
                    "Seller": seller.strip(),
                    "Location": location.strip(),
                    "Link": link_el.get_attribute("href")
                })
            except: continue
            
        df = pd.DataFrame(listings).drop_duplicates()
        if not df.empty:
            df['Numeric Price'] = df['Price'].apply(clean_price)
            df['Unit'] = df['Price'].apply(extract_unit)
            df = df.sort_values(by=['Unit', 'Numeric Price'], ascending=[True, True])
        return df

# --- 3. STREAMLIT UI ---
st.set_page_config(page_title="IndiaMart Price Tracker", layout="wide")

st.title("ðŸ“¦ IndiaMart Product Comparison Tool")
st.write(f"Developed for Dad by Aryan | {time.strftime('%Y')}")

# Sidebar Filters
with st.sidebar:
    st.header("Search Settings")
    search_query = st.text_input("Enter Product Name:", placeholder="e.g. PVC Pipes")
    search_button = st.button("Search & Compare")
    st.info("The tool will automatically sort by cheapest price and group by units (Kg/Piece).")

if search_button and search_query:
    with st.status(f"Searching for '{search_query}'...", expanded=True) as status:
        data = run_scraper(search_query)
        status.update(label="Search Complete!", state="complete", expanded=False)

    if not data.empty:
        # Metrics
        col1, col2, col3 = st.columns(3)
        col1.metric("Total Sellers Found", len(data))
        col2.metric("Min Price", f"â‚¹{data['Numeric Price'].min()}")
        col3.metric("Max Price", f"â‚¹{data['Numeric Price'].max()}")

        # Searchable Table
        st.subheader("Results Comparison")
        
        # Filtering by Location
        locations = ["All"] + sorted(data['Location'].unique().tolist())
        selected_loc = st.selectbox("Filter by City:", locations)
        
        filtered_df = data if selected_loc == "All" else data[data['Location'] == selected_loc]
        
        # Display with links
        st.dataframe(
            filtered_df[['Product', 'Numeric Price', 'Unit', 'Seller', 'Location', 'Link']],
            column_config={
                "Link": st.column_config.LinkColumn("Product Link")
            },
            use_container_width=True,
            hide_index=True
        )
        
        # Download Button
        csv = filtered_df.to_csv(index=False).encode('utf-8')
        st.download_button(
            label="ðŸ“¥ Download Data as CSV",
            data=csv,
            file_name=f"indiamart_{search_query.lower()}.csv",
            mime='text/csv',
        )
    else:
        st.error("No data found. Please try a different search term or check the logs.")


âœ… Professional report saved: Dad_IndiaMart_Pro_Report.xlsx
