In [6]:
# Ques_2.ipynb
# Data Quality Dashboard using Streamlit

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Page configuration
st.set_page_config(page_title="Data Quality Dashboard", layout="wide")

st.title("Data Quality Dashboard")

# File uploader
uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])

if uploaded_file:
    df = pd.read_csv(uploaded_file)
    st.write("### Data Preview")
    st.dataframe(df.head())

    # Data Quality Functions
    def calculate_completeness(column):
        return column.notnull().sum() / len(column)

    def calculate_uniqueness(column):
        return column.nunique() / len(column)

    def calculate_consistency(column):
        if column.dtype == 'object':
            return column.str.strip().str.lower().value_counts(normalize=True).max()
        return np.nan

    def calculate_validity(column):
        return np.nan  # Placeholder unless specific rules are defined

    # Calculate Metrics
    metrics = []
    for col in df.columns:
        completeness = calculate_completeness(df[col])
        uniqueness = calculate_uniqueness(df[col])
        consistency = calculate_consistency(df[col])
        validity = calculate_validity(df[col])
        valid_metrics = [v for v in [completeness, uniqueness, consistency, validity] if not pd.isna(v)]
        dqi = sum(valid_metrics) / len(valid_metrics) if valid_metrics else np.nan

        metrics.append({
            'Column': col,
            'Completeness': completeness,
            'Uniqueness': uniqueness,
            'Consistency': consistency,
            'Validity': validity,
            'DQI': dqi
        })

    dqi_df = pd.DataFrame(metrics)

    st.write("### Data Quality Metrics")
    st.dataframe(dqi_df.style.format("{:.2f}"))

    # Bar Chart
    st.write("### DQI Visualization")
    fig, ax = plt.subplots(figsize=(10, 5))
    dqi_df.sort_values("DQI", ascending=False).plot.bar(x='Column', y='DQI', ax=ax, legend=False, color="skyblue")
    plt.ylabel("DQI Score")
    plt.title("Data Quality Index per Column")
    st.pyplot(fig)

else:
    st.info("Please upload a CSV file to begin.")

ModuleNotFoundError: No module named 'streamlit'