In [1]:
# app.py
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

st.set_page_config(page_title="Data Cleaning App", layout="wide")

st.title("🔧 Data Cleaning App")
st.write("Upload your CSV file and perform basic cleaning operations like handling missing values, outliers, and data type conversion.")

# Upload CSV
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.write("### Preview of Dataset", df.head())

    # Show missing value info
    st.write("### Missing Value Summary")
    st.write(df.isnull().sum())

    st.subheader("🧹 Missing Value Handling")
    missing_option = st.selectbox("Choose a method to handle missing values", 
                                  ["None", "Drop Rows", "Fill with Mean", "Fill with Median", "Fill with Mode"])
    
    if missing_option == "Drop Rows":
        df.dropna(inplace=True)
    elif missing_option == "Fill with Mean":
        df.fillna(df.mean(numeric_only=True), inplace=True)
    elif missing_option == "Fill with Median":
        df.fillna(df.median(numeric_only=True), inplace=True)
    elif missing_option == "Fill with Mode":
        for column in df.columns:
            df[column].fillna(df[column].mode()[0], inplace=True)

    # Outlier detection
    st.subheader("📊 Outlier Handling (IQR Method)")
    numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
    outlier_column = st.selectbox("Select column to detect outliers", numeric_cols)

    if outlier_column:
        Q1 = df[outlier_column].quantile(0.25)
        Q3 = df[outlier_column].quantile(0.75)
        IQR = Q3 - Q1
        lower = Q1 - 1.5 * IQR
        upper = Q3 + 1.5 * IQR

        st.write(f"Removing rows outside {lower:.2f} to {upper:.2f}")
        df = df[(df[outlier_column] >= lower) & (df[outlier_column] <= upper)]

    # Data type conversion
    st.subheader("🔄 Data Type Conversion")
    for col in df.columns:
        current_dtype = df[col].dtype
        st.write(f"{col} - Current: {current_dtype}")
        new_dtype = st.selectbox(f"Convert {col} to:", ["No Change", "int", "float", "str", "category"], key=col)
        if new_dtype != "No Change":
            try:
                df[col] = df[col].astype(new_dtype)
            except Exception as e:
                st.error(f"Error converting {col}: {e}")

    # Show cleaned data
    st.write("### Cleaned Data Preview", df.head())

    # Download option
    @st.cache_data
    def convert_df(df):
        return df.to_csv(index=False).encode('utf-8')

    csv = convert_df(df)
    st.download_button("📥 Download Cleaned CSV", data=csv, file_name='cleaned_data.csv', mime='text/csv')


2025-06-29 21:51:39.251 
  command:

    streamlit run C:\Users\addal\AppData\Local\Programs\Python\Python311\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
