In [0]:
from pyspark.sql import DataFrame
from pyspark.sql.functions import *
from typing import List, Dict

In [0]:
def null_check(df: DataFrame) -> DataFrame:
    """
    Returns a DataFrame with null counts for each column.
    """
    return df.select([count(when(col(c).isNull(), c)).alias(f"{c}_null_count") for c in df.columns])

In [0]:
def count_rows(df: DataFrame) -> int:
    """
    Returns the number of rows in the DataFrame.
    """
    return df.count()

In [0]:
def check_value_range(df: DataFrame, column: str, min_val: float, max_val: float) -> DataFrame:
    """
    Returns rows where the column value is outside the expected range.
    """
    return df.filter((col(column) < min_val) | (col(column) > max_val))

In [0]:
def check_duplicates(df: DataFrame, subset: List[str]) -> DataFrame:
    """
    Returns duplicates based on the specified subset of columns.
    """
    return df.groupBy(subset).count().filter("count > 1")

In [0]:
def check_column_types(df: DataFrame) -> Dict[str, str]:
    """
    Returns the column names and their data types as a dictionary.
    """
    return {field.name: field.dataType.simpleString() for field in df.schema.fields}

In [0]:
def check_unique_values(df: DataFrame, column: str) -> DataFrame:
    """
    Returns a DataFrame with count of each distinct value in a column.
    """
    return df.groupBy(column).count().orderBy("count", ascending=False)
