In [1]:
import subprocess
#import webbrowser
import time

# Streamlit app code
first_data='''
import streamlit as st
import pandas as pd
import os
import requests
import json
from bs4 import BeautifulSoup
import plotly.express as px

# ------------------------------------
# STREAMLIT UI HEADER
# ------------------------------------
st.set_page_config(page_title="Universal Data Pipeline", layout="wide")
st.title("📊 Universal Data Pipeline")
st.markdown("""
Welcome! Upload your data, clean it, explore it, and visualize it — all in one app.
""")

# ------------------------------------
# STEP 1: Load Data (from file, API, URL, etc.)
# ------------------------------------
st.header("📥 Step 1: Load Your Data")

# File uploader
file = st.file_uploader("Upload data file (.csv, .tsv, .xlsx, .json):", type=['csv', 'tsv', 'xlsx', 'json'])

data_source = st.radio("Or choose another source:", ["None", "Paste API URL", "Paste Web URL (HTML table)"])

if file:
    ext = os.path.splitext(file.name)[1]
    if ext == ".csv":
        df = pd.read_csv(file)
    elif ext == ".tsv":
        df = pd.read_csv(file, sep='\t')
    elif ext == ".xlsx":
        df = pd.read_excel(file)
    elif ext == ".json":
        df = pd.read_json(file)
    st.success("✅ Data loaded successfully from uploaded file.")

elif data_source == "Paste API URL":
    api_url = st.text_input("Paste API Endpoint:")
    if api_url:
        try:
            response = requests.get(api_url)
            df = pd.DataFrame(response.json())
            st.success("✅ Data loaded successfully from API.")
        except:
            st.error("❌ Could not load data from API.")

elif data_source == "Paste Web URL (HTML table)":
    web_url = st.text_input("Paste Webpage URL:")
    if web_url:
        try:
            soup = BeautifulSoup(requests.get(web_url).text, "lxml")
            tables = pd.read_html(str(soup))
            df = tables[0]
            st.success("✅ Data loaded successfully from HTML table.")
        except:
            st.error("❌ Could not parse HTML table.")

else:
    st.info("⬆️ Upload a file or choose another data source to get started.")

# ------------------------------------
# STEP 2: Understand the Data
# ------------------------------------
if 'df' in locals():
    st.header("🔍 Step 2: Understand Your Data")

    st.write("Preview of Data:")
    st.dataframe(df.head())

    st.subheader("Summary Info:")
    st.write(df.info())
    st.write("Shape:", df.shape)
    st.write("Column Types:")
    st.write(df.dtypes)
    st.write("Missing Values:")
    st.write(df.isnull().sum())

    domain = st.selectbox("Which field does your data relate to?", ["General", "Health", "Finance", "Education", "E-commerce", "Other"])
    st.info(f"You're working with: **{domain}** data")

    # ------------------------------------
    # STEP 3: Clean Data
    # ------------------------------------
    st.header("🧹 Step 3: Clean Your Data")

    # Handle nulls
    null_strategy = st.radio("How do you want to handle missing values?", ["Drop rows", "Forward Fill", "Backward Fill", "Do nothing"])
    if null_strategy == "Drop rows":
        df.dropna(inplace=True)
        st.success("✅ Null rows dropped.")
    elif null_strategy == "Forward Fill":
        df.fillna(method='ffill', inplace=True)
        st.success("✅ Forward fill applied.")
    elif null_strategy == "Backward Fill":
        df.fillna(method='bfill', inplace=True)
        st.success("✅ Backward fill applied.")

    # Handle duplicates
    if df.duplicated().any():
        duplicate_action = st.radio("Duplicates detected. What do you want to do?", ["Drop duplicates", "Keep them"])
        if duplicate_action == "Drop duplicates":
            df.drop_duplicates(inplace=True)
            st.success("✅ Duplicates removed.")
        else:
            st.info("⚠️ Duplicates retained.")
    else:
        st.info("✅ No duplicate rows found.")

    # Download cleaned data
    st.download_button("Download Cleaned Data", df.to_csv(index=False), file_name="cleaned_data.csv")

    # ------------------------------------
    # STEP 4: Visualize Data
    # ------------------------------------
    st.header("📈 Step 4: Visualize Your Data")

    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
    categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()

    plot_type = st.selectbox("Choose the type of plot:", [
        "Histogram",
        "Box Plot",
        "Bar Chart (Categorical)",
        "Scatter Plot",
        "Line Plot",
    ])

    if plot_type == "Histogram":
        col = st.selectbox("Select numeric column for histogram:", numeric_cols)
        fig = px.histogram(df, x=col, title=f"Histogram of {col}")
        st.plotly_chart(fig, use_container_width=True)

    elif plot_type == "Box Plot":
        col = st.selectbox("Select numeric column for box plot:", numeric_cols)
        fig = px.box(df, y=col, title=f"Box Plot of {col}")
        st.plotly_chart(fig, use_container_width=True)

    elif plot_type == "Bar Chart (Categorical)":
        col = st.selectbox("Select categorical column:", categorical_cols)
        top_n = st.slider("Top N categories to display:", min_value=5, max_value=30, value=10)
        counts = df[col].value_counts().head(top_n).reset_index()
        counts.columns = [col, "count"]
        fig = px.bar(counts, x=col, y="count", title=f"Top {top_n} categories in {col}")
        st.plotly_chart(fig, use_container_width=True)

    elif plot_type == "Scatter Plot":
        x_col = st.selectbox("Select X-axis (numeric):", numeric_cols)
        y_col = st.selectbox("Select Y-axis (numeric):", numeric_cols)
        color_col = st.selectbox("Optional: Color by (categorical):", ["None"] + categorical_cols)
        if color_col != "None":
            fig = px.scatter(df, x=x_col, y=y_col, color=df[color_col], title=f"{y_col} vs {x_col} colored by {color_col}")
        else:
            fig = px.scatter(df, x=x_col, y=y_col, title=f"{y_col} vs {x_col}")
        st.plotly_chart(fig, use_container_width=True)

    elif plot_type == "Line Plot":
        x_col = st.selectbox("Select X-axis (date or numeric):", df.columns)
        y_col = st.selectbox("Select Y-axis (numeric):", numeric_cols)
        fig = px.line(df.sort_values(by=x_col), x=x_col, y=y_col, title=f"{y_col} Over {x_col}")
        st.plotly_chart(fig, use_container_width=True)
else:
    st.warning("⚠️ No data loaded yet.")
'''

# Save code to file
#with open("first_data.py", "w") as f:
with open("first_data.py", "w", encoding="utf-8") as f:

    f.write(first_data)

print("✅ Streamlit app saved as first_data.py")

# Run the app
process = subprocess.Popen(["streamlit", "run", "first_data.py"])

# Wait for server to start
time.sleep(5)

# Open in browser
#webbrowser.open("http://localhost:8501")

✅ Streamlit app saved as piping_data.py
