In [None]:
import pandas as pd
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine

# Load environment variables
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")
if not DATABASE_URL:
    raise ValueError("DATABASE_URL not found in .env file")

engine = create_engine(DATABASE_URL)

def get_engine():
    return engine

## Load Tables and Check Nulls

In [3]:
def load_table(table_name: str) -> pd.DataFrame:
    return pd.read_sql(f"SELECT * FROM {table_name}", engine)

def check_nulls(df: pd.DataFrame) -> pd.Series:
    return df.isnull().sum()

tables = ["users", "products", "orders", "order_items"]
report_lines = []

for table in tables:
    df = load_table(table)
    report_lines.append(f"TABLE: {table}")
    report_lines.append(f"Total rows: {len(df)}")
    report_lines.append("Null values per column:")
    report_lines.append(str(check_nulls(df)))
    report_lines.append("-" * 40)
    print(f"Table {table}: {len(df)} rows")
    print(check_nulls(df))
    print("-" * 40)

Table users: 8 rows
user_id          0
role_id          0
full_name        0
email            0
password_hash    0
is_active        0
created_at       0
updated_at       0
deleted_at       8
deleted_by       8
dtype: int64
----------------------------------------
Table products: 83 rows
product_id      0
name            0
description    30
price           0
stock           0
is_active       0
created_at      0
updated_at      0
deleted_at     25
deleted_by     81
category        5
dtype: int64
----------------------------------------
Table products: 83 rows
product_id      0
name            0
description    30
price           0
stock           0
is_active       0
created_at      0
updated_at      0
deleted_at     25
deleted_by     81
category        5
dtype: int64
----------------------------------------
Table orders: 67 rows
order_id         0
user_id          0
status           0
total_amount     0
created_at       0
updated_at       0
is_active        0
deleted_at      67
deleted_by

## Save Report

In [4]:
with open("../reports/data_quality_report.txt", "w") as file:
    file.write("\n".join(report_lines))

print("Data quality report generated successfully.")

Data quality report generated successfully.
