## Data Quality Dashboard in Python

**Description**: Create a basic dashboard using a Python library (e.g., Plotly Dash) to visualize data quality metrics for a given dataset.

In [None]:
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import pandas as pd
import numpy as np

def calculate_missing_percentage(data):
    """
    Calculates the percentage of missing values in a numpy array.

    Args:
        data (numpy.ndarray): The input numpy array.

    Returns:
        float: The percentage of missing values in the dataset.
               Returns 0.0 if the input is not a numpy array or is empty.
    """
    if not isinstance(data, np.ndarray):
        print("Error: Input must be a numpy array.")
        return 0.0

    if data.size == 0:
        print("Warning: Input array is empty.")
        return 0.0

    missing_count = np.isnan(data).sum()
    total_elements = data.size
    missing_percentage = (missing_count / total_elements) * 100
    return missing_percentage

def calculate_overall_data_quality(dimension_scores):
    """
    Computes an overall data quality score based on six key dimensions,
    assuming each contributes equally.

    Args:
        dimension_scores (list): A list of six integers representing the scores
            of the six dimensions. Each score should be between 0 and 100, inclusive.

    Returns:
        float: The overall data quality score, which is the average of the
            six dimension scores. Returns None if the input is invalid.
    """
    if not isinstance(dimension_scores, list) or len(dimension_scores) != 6:
        print("Error: Input must be a list of six integers.")
        return None

    for score in dimension_scores:
        if not isinstance(score, (int, float)) or not 0 <= score <= 100: # Modified to accept int or float
            print("Error: Each dimension score must be a number between 0 and 100 inclusive.")
            return None

    overall_score = sum(dimension_scores) / len(dimension_scores)
    return overall_score

# Sample Data (replace with your actual data loading)
data = np.array([[1, 2, np.nan, 4],
                 [5, np.nan, 7, 8],
                 [9, 10, 11, 12],
                 [np.nan, 14, 15, 16]])

# Calculate metrics (replace with your actual metric calculations)
missing_percentage = calculate_missing_percentage(data)
completeness_score = 100 - missing_percentage  # Example: Completeness as 100 - missing %
uniqueness_score = 85.0 
validity_score = 92.0
accuracy_score = 98.0 # Example
consistency_score = 78.0 # Example
timeliness_score = 95.0 # Example

dimension_scores = [completeness_score, uniqueness_score, validity_score, accuracy_score, consistency_score, timeliness_score]
overall_quality_score = calculate_overall_data_quality(dimension_scores)
df = pd.DataFrame({
    'Dimension': ['Completeness', 'Uniqueness', 'Validity', 'Accuracy', 'Consistency', 'Timeliness', 'Overall'],
    'Score': [completeness_score, uniqueness_score, validity_score, accuracy_score, consistency_score, timeliness_score, overall_quality_score]
})

app = dash.Dash(__name__)
app.layout = html.Div(children=[
    html.H1(children='Data Quality Dashboard', style={'textAlign': 'center'}),

    html.Div(children='Visualization of Data Quality Metrics', style={'textAlign': 'center'}),

    # Bar chart for individual dimensions
    dcc.Graph(
        id='data-quality-bar-chart',
        figure={
            'data': [
                {'x': df['Dimension'][:-1], 'y': df['Score'][:-1], 'type': 'bar', 'name': 'Dimension Scores'},
            ],
            'layout': {
                'title': 'Data Quality Scores by Dimension',
                'xaxis': {'title': 'Dimension'},
                'yaxis': {'title': 'Score (0-100)'},
                'height': 500,
            }
        }
    ),

    # Gauge chart for overall score
    dcc.Graph(
        id='overall-data-quality-gauge-chart',
        figure={
            'data': [
                go.Indicator(
                    mode = "gauge+number",
                    value = overall_quality_score,
                    domain = {'x': [0, 1], 'y': [0, 1]},
                    gauge = {
                        'axis': {'range': [0, 100]},
                        'bar': {'color': "#FF5E57"}, # Color for the gauge bar
                        'steps' : [
                            {'range': [0, 20], 'color': "#B22222"}, # Example ranges
                            {'range': [20, 40], 'color': "#DC143C"},
                            {'range': [40, 60], 'color': "#FF4500"},
                            {'range': [60, 80], 'color': "#FFA500"},
                            {'range': [80, 100], 'color': "#228B22"}
                            ],
                        'threshold' : {'line': {'color': "black", 'width': 4}, 'value': overall_quality_score}
                    }
                )
            ],
            'layout': {
                'title': "Overall Data Quality Score",
                'height': 400
            }
        }
    )
])

if __name__ == '__main__':
    app.run(debug=True, port=8051)
