In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

st.set_page_config(layout="wide")
st.title("🔍 ML Pipeline: From Data Input to Model Deployment")

# STEP 1: Upload dataset
uploaded_file = st.file_uploader("📂 Upload your dataset (CSV format only)", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.subheader("📊 Raw Dataset Preview")
    st.write(df.head())

    # STEP 2: Data Cleaning
    st.subheader("🧹 Data Cleaning")
    st.write("Missing values in each column:")
    st.write(df.isnull().sum())

    if st.checkbox("Drop rows with missing values"):
        df.dropna(inplace=True)
        st.success("Missing rows removed!")

    # STEP 3: Data Visualization
    st.subheader("📈 Exploratory Data Analysis")
    st.write("Select columns to visualize")

    numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
    selected_col = st.selectbox("Choose a numeric column", numeric_cols)

    if selected_col:
        fig, ax = plt.subplots()
        sns.histplot(df[selected_col], kde=True, ax=ax)
        st.pyplot(fig)

    # STEP 4: Preprocessing
    st.subheader("⚙️ Data Preprocessing")

    target_col = st.selectbox("🎯 Select Target Column", df.columns)
    X = df.drop(columns=[target_col])
    y = df[target_col]

    # Encode categorical features
    for col in X.select_dtypes(include="object").columns:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])

    # Encode target if needed
    if y.dtype == 'object':
        y = LabelEncoder().fit_transform(y)

    # Scaling features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    st.success("✅ Preprocessing complete!")

    # STEP 5: Model Training
    st.subheader("🤖 Model Development")

    test_size = st.slider("Select test size", 0.1, 0.5, 0.2)
    random_state = st.number_input("Random state (for reproducibility)", value=42)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=test_size, random_state=int(random_state))

    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    # STEP 6: Evaluation
    st.subheader("📋 Model Evaluation")
    st.write("Accuracy:", accuracy_score(y_test, y_pred))
    st.text("Classification Report:")
    st.text(classification_report(y_test, y_pred))

    # Predict with user input
    st.subheader("📝 Make a Prediction")
    input_data = {}
    for col in df.drop(columns=[target_col]).columns:
        value = st.text_input(f"Enter value for {col}")
        input_data[col] = value

    if st.button("Predict"):
        input_df = pd.DataFrame([input_data])

        for col in input_df.columns:
            if input_df[col].dtype == 'object':
                input_df[col] = LabelEncoder().fit(df[col]).transform(input_df[col])

        input_df_scaled = scaler.transform(input_df)
        prediction = model.predict(input_df_scaled)
        st.success(f"🎉 Predicted class: {prediction[0]}")
