In [None]:
# Import python packages
import streamlit as st
import pandas as pd
import altair as alt
from IPython.display import display, Markdown

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
SELECT *
FROM RECEIPTS;

In [None]:
SELECT *
FROM USERS;

In [None]:
SELECT *
FROM BRANDS;

In [None]:
SELECT *
FROM REWARDS_RECEIPT_ITEM_LIST;

In [None]:
# Turn all tables into a Pandas dataframe
receipts_df = receipts.to_pandas()
users_df = users.to_pandas()
brands_df = brands.to_pandas()
item_df = item.to_pandas()

In [None]:

def check_data_quality(df):
    # Missing values report
    missing_data = df.isnull().sum()
    missing_percent = (df.isnull().sum() / len(df)) * 100
    missing_report = pd.DataFrame({'Column': missing_data.index, 'Missing Values': missing_data.values, 'Missing Percent (%)': missing_percent.values})
    missing_report = missing_report[missing_report['Missing Values'] > 0]

    # Duplicates report
    duplicates_data = [{'Column': column, 'Duplicate Count': df[column].duplicated().sum()} for column in df.columns if df[column].duplicated().sum() > 0]
    duplicates_report = pd.DataFrame(duplicates_data)

    # Data types report
    data_types_report = pd.DataFrame({'Column': df.columns, 'Data Type': df.dtypes.values})

    return {
        "Missing Values Report": missing_report,
        "Duplicates Report": duplicates_report,
        "Data Types Report": data_types_report
    }

def generate_quality_report(dfs, df_names):
    report_dict = {}
    for df, name in zip(dfs, df_names):
        report = check_data_quality(df)
        report_dict[name] = report
    return report_dict


# List of DataFrames and their names
dfs = [receipts_df, users_df, brands_df, item_df]
df_names = ['Receipts', 'Users', 'Brands', 'Item Lists']

# Generate the quality report for all DataFrames
report = generate_quality_report(dfs, df_names)

# Displaying the reports
for df_name, reports in report.items():
    print(f"Data Quality Report for {df_name}\n")
    for report_name, report_df in reports.items():
        print(report_name)
        display(report_df)
        print()