## Data Quality Dashboard in Python

**Description**: Create a basic dashboard using a Python library (e.g., Plotly Dash) to visualize data quality metrics for a given dataset.

In [1]:
import pandas as pd
import numpy as np
from io import StringIO
from dash import Dash, dcc, html
import plotly.graph_objs as go

# Create a sample CSV data as a string
csv_data = """
ID,Name,Age,Salary,Department,JoinDate
1,Alice,30,70000,HR,2015-08-01
2,Bob,NaN,85000,Engineering,2016-06-15
3,Charlie,25,,Marketing,2017-01-10
4,David,45,120000,Engineering,
5,Eva,35,NaN,HR,2018-03-05
6,Frank,,95000,Marketing,2016-11-23
7,Grace,29,70000,Engineering,2019-07-12
8,Hank,NaN,75000,HR,2020-05-21
9,Ivy,41,110000,Marketing,2017-09-30
10,Jack,38,105000,Engineering,2018-12-01
"""

# Load the CSV data into a DataFrame
df = pd.read_csv(StringIO(csv_data))

# Calculate Data Quality Metrics
def calculate_metrics(df):
    metrics = {}
    metrics['missing'] = df.isnull().sum()
    metrics['unique'] = df.nunique()
    metrics['dtype'] = df.dtypes.astype(str)
    return metrics

metrics = calculate_metrics(df)

# Initialize Dash app
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Data Quality Dashboard", style={'textAlign': 'center'}),

    html.H2("Dataset Preview"),
    dcc.Markdown("Showing first 5 rows of the dataset:"),
    html.Div([
        html.Table([
            html.Thead([
                html.Tr([html.Th(col) for col in df.columns])
            ]),
            html.Tbody([
                html.Tr([html.Td(str(df.iloc[i][col])) for col in df.columns])
                for i in range(min(len(df), 5))
            ])
        ], style={'width': '100%', 'border': '1px solid black', 'borderCollapse': 'collapse'})
    ], style={'marginBottom': '30px'}),

    html.H2("Missing Values Per Column"),
    dcc.Graph(
        figure=go.Figure(
            data=[
                go.Bar(
                    x=metrics['missing'].index,
                    y=metrics['missing'].values,
                    marker_color='crimson'
                )
            ],
            layout=go.Layout(
                yaxis_title="Count of Missing Values",
                xaxis_title="Columns"
            )
        )
    ),

    html.H2("Unique Values Per Column"),
    dcc.Graph(
        figure=go.Figure(
            data=[
                go.Bar(
                    x=metrics['unique'].index,
                    y=metrics['unique'].values,
                    marker_color='mediumseagreen'
                )
            ],
            layout=go.Layout(
                yaxis_title="Count of Unique Values",
                xaxis_title="Columns"
            )
        )
    ),

    html.H2("Data Types of Columns"),
    html.Ul([
        html.Li(f"{col}: {dtype}") for col, dtype in metrics['dtype'].items()
    ])
])

if __name__ == '__main__':
    app.run_server(debug=True)


ModuleNotFoundError: No module named 'dash'