## Data Quality Dashboard in Python

**Description**: Create a basic dashboard using a Python library (e.g., Plotly Dash) to visualize data quality metrics for a given dataset.

In [None]:
# Write your code from here
# Import required libraries
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import pandas as pd
import numpy as np

# Sample data generation
def generate_sample_data():
    data = {
        'customer_id': range(1, 101),
        'name': [f'Customer_{i}' for i in range(1, 101)],
        'email': [f'user{i}@example.com' if np.random.random() > 0.2 else None for i in range(1, 101)],
        'phone': [f'555-{np.random.randint(1000,9999)}' if np.random.random() > 0.15 else None for _ in range(100)],
        'age': [np.random.randint(18, 80) if np.random.random() > 0.1 else None for _ in range(100)],
        'join_date': pd.date_range('2020-01-01', periods=100, freq='D').tolist(),
        'purchase_amount': [round(np.random.uniform(10, 500), 2) if np.random.random() > 0.05 else None for _ in range(100)]
    }
    return pd.DataFrame(data)

df = generate_sample_data()

# Initialize the Dash app
app = dash.Dash(__name__)

# Calculate data quality metrics
def calculate_metrics(dataframe):
    metrics = {
        'total_records': len(dataframe),
        'missing_values': dataframe.isnull().sum().to_dict(),
        'completeness': {col: 1 - (dataframe[col].isnull().sum() / len(dataframe)) for col in dataframe.columns},
        'data_types': dataframe.dtypes.astype(str).to_dict()
    }
    return metrics

metrics = calculate_metrics(df)

# App layout
app.layout = html.Div([
    html.H1("Data Quality Dashboard", style={'textAlign': 'center'}),
    
    html.Div([
        html.Div([
            html.H3("Dataset Overview"),
            html.P(f"Total Records: {metrics['total_records']}"),
            html.P(f"Columns: {len(df.columns)}"),
            dcc.Dropdown(
                id='column-selector',
                options=[{'label': col, 'value': col} for col in df.columns],
                value=df.columns[0],
                multi=False
            )
        ], style={'width': '30%', 'display': 'inline-block', 'padding': '20px'}),
        
        html.Div([
            html.H3("Missing Values by Column"),
            dcc.Graph(id='missing-values-chart')
        ], style={'width': '65%', 'display': 'inline-block', 'float': 'right'})
    ]),
    
    html.Div([
        html.Div([
            html.H3("Data Distribution"),
            dcc.Graph(id='data-distribution')
        ], style={'width': '48%', 'display': 'inline-block', 'padding': '10px'}),
        
        html.Div([
            html.H3("Data Completeness"),
            dcc.Graph(id='completeness-chart')
        ], style={'width': '48%', 'display': 'inline-block', 'float': 'right', 'padding': '10px'})
    ]),
    
    html.Div([
        html.H3("Data Preview"),
        html.Div(id='data-preview')
    ], style={'padding': '20px'})
])

# Callbacks
@app.callback(
    Output('missing-values-chart', 'figure'),
    Input('column-selector', 'value')
)
def update_missing_values(selected_column):
    missing_counts = df.isnull().sum()
    fig = px.bar(
        x=missing_counts.index,
        y=missing_counts.values,
        labels={'x': 'Columns', 'y': 'Missing Values Count'},
        title="Missing Values Across All Columns"
    )
    return fig

@app.callback(
    Output('data-distribution', 'figure'),
    Input('column-selector', 'value')
)
def update_distribution(selected_column):
    if df[selected_column].dtype in ['object', 'datetime64[ns]']:
        fig = px.histogram(
            df, 
            x=selected_column,
            title=f"Distribution of {selected_column}"
        )
    else:
        fig = px.box(
            df, 
            y=selected_column,
            title=f"Distribution of {selected_column}"
        )
    return fig

@app.callback(
    Output('completeness-chart', 'figure'),
    Input('column-selector', 'value')
)
def update_completeness(selected_column):
    completeness = {k: v*100 for k, v in metrics['completeness'].items()}
    fig = px.bar(
        x=list(completeness.keys()),
        y=list(completeness.values()),
        labels={'x': 'Columns', 'y': 'Completeness (%)'},
        title="Data Completeness by Column"
    )
    return fig

@app.callback(
    Output('data-preview', 'children'),
    Input('column-selector', 'value')
)
def update_preview(selected_column):
    preview_df = df[[selected_column]].head(10)
    return html.Table([
        html.Thead(html.Tr([html.Th(selected_column)])),
        html.Tbody([
            html.Tr([html.Td(str(preview_df.iloc[i][0]))]) 
            for i in range(len(preview_df))
        ])
    ])

if __name__ == '__main__':
    app.run(debug=True) 
