<a href="https://colab.research.google.com/github/KashifAliLashari/datalysis/blob/main/datalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import sys
import importlib
import io
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

st.set_page_config(page_title="Datalysis", page_icon="📋", layout="wide")

# Initialize session state for navigation
if "page" not in st.session_state:
    st.session_state.page = "home"

def go_home():
    st.session_state.page = "home"

# Title
st.title("Datalysis")

# Function to import and log modules (without version logging)
def import_and_log(module_name):
    try:
        module = importlib.import_module(module_name)
        return module
    except ImportError as e:
        st.error(f"Failed to import {module_name}: {str(e)}")
        st.stop()

# Import necessary modules
np = import_and_log("numpy")
pd = import_and_log("pandas")
matplotlib = import_and_log("matplotlib")
plt = import_and_log("matplotlib.pyplot")
sns = import_and_log("seaborn")
px = import_and_log("plotly.express")

# File uploader
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

# Function to check file size
def check_file_size(file):
    max_size = 5 * 1024 * 1024  # 5 MB
    file.seek(0, 2)
    file_size = file.tell()
    file.seek(0)
    if file_size > max_size:
        st.error(f"File size exceeds the limit of 5 MB. Your file is {file_size / 1024 / 1024:.2f} MB.")
        return False
    return True

# Enhanced data exploration function
def explore_dataset(df):
    # Dataset Preview
    st.subheader("📋 Dataset Preview")
    col1, col2 = st.columns(2)
    with col1:
        st.dataframe(df.head(10))
    with col2:
        st.dataframe(df.tail(10))

    # Column Details
    st.subheader("📝 Column Details")
    col1, col2 = st.columns(2)
    with col1:
        column_details = pd.DataFrame({
            'Column Name': df.columns,
            'Data Type': df.dtypes,
            'Non-Null Count': df.count(),
            'Unique Values': [df[col].nunique() for col in df.columns],
            'Missing Values': df.isnull().sum()
        })
        st.dataframe(column_details)

    # Missing Data Analysis
    st.subheader("❓ Missing Data Analysis")
    missing_percentages = (df.isnull().sum() / len(df) * 100).round(2)
    missing_df = pd.DataFrame({
        'Column': missing_percentages.index,
        'Missing %': missing_percentages.values
    }).sort_values('Missing %', ascending=False)

    col1, col2 = st.columns(2)
    with col1:
        st.dataframe(missing_df)

    with col2:
        if missing_df['Missing %'].sum() > 0:
            fig, ax = plt.subplots(figsize=(10, 6))
            missing_percentages.plot(kind='bar', ax=ax)
            plt.title('Missing Values Percentage by Column')
            plt.xlabel('Columns')
            plt.ylabel('Missing %')
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            st.pyplot(fig)

# Main app logic
if st.session_state.page == "home":
    if uploaded_file is not None:
        if check_file_size(uploaded_file):
            try:
                # Read the CSV file
                df = pd.read_csv(uploaded_file)
                st.session_state["df"] = df

                # Display basic information about the dataset
                st.write("Dataset Info:")
                st.write(f"Number of rows: {df.shape[0]}")
                st.write(f"Number of columns: {df.shape[1]}")

                # Automatically generate data exploration insights
                if st.button("Explore Dataset", key="explore_button"):
                    explore_dataset(df)

                # Visualization options in grid
                viz_option = st.radio(
                    "Choose a visualization:",
                    ("Data Overview", "Correlation Heatmap", "Distribution Plot", "Scatter Plot"),
                    key="viz_option"
                )

                if st.button("Generate Visualization", key="generate_button"):
                    st.session_state.page = viz_option

            except Exception as e:
                st.error(f"An error occurred: {str(e)}")
                st.error("Please check your CSV file and try again.")
    else:
        st.write("Please upload a CSV file to begin.")

# Navigation for different visualization pages
elif st.session_state.page == "Data Overview":
    st.title("📋 Data Overview")
    df = st.session_state.get("df")
    if df is not None:
        st.write(df.describe())
        fig, ax = plt.subplots(figsize=(10, 6))
        df.plot(kind='box', ax=ax)
        st.pyplot(fig)
    st.button("Back", on_click=go_home)

elif st.session_state.page == "Correlation Heatmap":
    st.title("🔍 Correlation Heatmap")
    df = st.session_state.get("df")
    if df is not None:
        fig, ax = plt.subplots(figsize=(10, 8))
        sns.heatmap(df.corr(), annot=True, cmap='coolwarm', ax=ax)
        st.pyplot(fig)
    st.button("Back", on_click=go_home)

elif st.session_state.page == "Distribution Plot":
    st.title("📋 Distribution Plot")
    df = st.session_state.get("df")
    if df is not None:
        numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
        x_col = st.selectbox("Select X-axis:", numeric_cols)
        y_col = st.selectbox("Select Y-axis:", numeric_cols)
        if st.button("Generate Plot"):
            fig = px.histogram(df, x=x_col, y=y_col, marginal="box")
            st.plotly_chart(fig)
    st.button("Back", on_click=go_home)

elif st.session_state.page == "Scatter Plot":
    st.title("📋 Scatter Plot")
    df = st.session_state.get("df")
    if df is not None:
        numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
        x_col = st.selectbox("Select X-axis:", numeric_cols)
        y_col = st.selectbox("Select Y-axis:", numeric_cols)
        if st.button("Generate Plot"):
            fig = px.scatter(df, x=x_col, y=y_col, trendline="ols")
            st.plotly_chart(fig)
    st.button("Back", on_click=go_home)
