<a href="https://colab.research.google.com/github/KashifAliLashari/datalysis/blob/main/datalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import io

# Set page title
st.set_page_config(page_title="Dataset Visualizer")

# Title
st.title("Dataset Visualizer")

# File uploader
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

# Function to check file size
def check_file_size(file):
    max_size = 1 * 1024 * 1024  # 1 MB
    file.seek(0, 2)
    file_size = file.tell()
    file.seek(0)
    if file_size > max_size:
        st.error(f"File size exceeds the limit of 1 MB. Your file is {file_size / 1024 / 1024:.2f} MB.")
        return False
    return True

# Main app logic
if uploaded_file is not None:
    if check_file_size(uploaded_file):
        # Read the CSV file
        df = pd.read_csv(uploaded_file)

        # Display basic information about the dataset
        st.write("Dataset Info:")
        st.write(f"Number of rows: {df.shape[0]}")
        st.write(f"Number of columns: {df.shape[1]}")

        # Visualization options
        viz_option = st.radio(
            "Choose a visualization:",
            ("Data Overview", "Correlation Heatmap", "Distribution Plot", "Scatter Plot")
        )

        if st.button("Generate Visualization"):
            if viz_option == "Data Overview":
                st.write(df.describe())
                fig, ax = plt.subplots(figsize=(10, 6))
                df.plot(kind='box', ax=ax)
                st.pyplot(fig)

            elif viz_option == "Correlation Heatmap":
                fig, ax = plt.subplots(figsize=(10, 8))
                sns.heatmap(df.corr(), annot=True, cmap='coolwarm', ax=ax)
                st.pyplot(fig)

            elif viz_option == "Distribution Plot":
                numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
                selected_col = st.selectbox("Select a numeric column:", numeric_cols)
                fig = px.histogram(df, x=selected_col, marginal="box")
                st.plotly_chart(fig)

            elif viz_option == "Scatter Plot":
                numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
                x_col = st.selectbox("Select X-axis:", numeric_cols)
                y_col = st.selectbox("Select Y-axis:", numeric_cols)
                fig = px.scatter(df, x=x_col, y=y_col, trendline="ols")
                st.plotly_chart(fig)

            # Download option
            buf = io.BytesIO()
            plt.savefig(buf, format='png')
            btn = st.download_button(
                label="Download Visualization",
                data=buf,
                file_name="visualization.png",
                mime="image/png"
            )

else:
    st.write("Please upload a CSV file to begin.")