## Data Quality Dashboard in Python

**Description**: Create a basic dashboard using a Python library (e.g., Plotly Dash) to visualize data quality metrics for a given dataset.

In [1]:
import pandas as pd
import dash
from dash import dcc, html
import plotly.express as px
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc

# Sample dataset (replace with your own CSV file if needed)
df = pd.read_csv('your_dataset.csv')  # Replace with your dataset path

# Calculate data quality metrics
missing_values = df.isnull().sum()
missing_percent = (missing_values / len(df)) * 100
data_types = df.dtypes.value_counts()
duplicate_count = df.duplicated().sum()

# Initialize the app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Data Quality Dashboard"

app.layout = dbc.Container([
    html.H1("Data Quality Dashboard", className="text-center my-4"),

    dbc.Row([
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H5("Total Rows", className="card-title"),
                    html.P(f"{len(df)}")
                ])
            ])
        ]),
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H5("Total Columns", className="card-title"),
                    html.P(f"{df.shape[1]}")
                ])
            ])
        ]),
        dbc.Col([
            dbc.Card([
                dbc.CardBody([
                    html.H5("Duplicate Rows", className="card-title"),
                    html.P(f"{duplicate_count}")
                ])
            ])
        ]),
    ], className="mb-4"),

    dbc.Row([
        dbc.Col([
            dcc.Graph(
                figure=px.bar(
                    x=missing_values.index,
                    y=missing_percent,
                    labels={'x': 'Columns', 'y': 'Missing (%)'},
                    title="Missing Data Percentage by Column"
                )
            )
        ])
    ]),

    dbc.Row([
        dbc.Col([
            dcc.Graph(
                figure=px.pie(
                    names=data_types.index.astype(str),
                    values=data_types.values,
                    title="Data Types Distribution"
                )
            )
        ])
    ]),

    dbc.Row([
        dbc.Col([
            html.H5("Summary Statistics"),
            html.Pre(df.describe(include='all').to_string())
        ])
    ])
], fluid=True)

if __name__ == "__main__":
    app.run_server(debug=True)


ModuleNotFoundError: No module named 'dash'