# Functions

In [31]:
from sqlalchemy import create_engine
import pandas as pd
from datetime import datetime
from typing import Union, List, Optional

# Importing DataBase From PostgreSQL

In [32]:
def get_db_engine():
    """Create and return a database engine"""
    return create_engine("postgresql+psycopg2://postgres:password@localhost:5432/Data_Asset_Linkage")

# LEVEL 0: Making Functions to fetch the data from the tables 

In [39]:
# Get Customer information Function 
def get_customers(
    customer_id: Union[str, List[str], None] = None,
    name: Union[str, List[str], None] = None,
    city: Union[str, List[str], None] = None,
    update_date: Union[str, datetime.date, List[Union[str, datetime.date]], None] = None,
    exact_match: bool = True,
    min_update_date: Union[str, datetime.date, None] = None,
    max_update_date: Union[str, datetime.date, None] = None
) -> pd.DataFrame:
    """
    Ultimate flexible customer data retrieval with support for all parameter types.
    
    Parameters:
    - customer_id: Single ID or list of IDs (exact match)
    - name: Single name or list of names (exact/partial match)
    - city: Single city or list of cities (exact/partial match)
    - update_date: Single date or list of dates (str 'YYYY-MM-DD' or date object)
    - exact_match: False for partial text matching (default True)
    - min_update_date: Minimum date filter (inclusive)
    - max_update_date: Maximum date filter (inclusive)
    
    Returns:
    - Pandas DataFrame with matching customer records
    """
    
    # Base query
    query = "SELECT * FROM customer WHERE 1=1"
    params = {}
    param_counter = 0
    
    def add_condition(field, value, exact=True, is_date=False):
        nonlocal query, params, param_counter
        param_prefix = f"{field}_{param_counter}"
        param_counter += 1
        
        if isinstance(value, (list, tuple)):
            conditions = []
            for i, val in enumerate(value):
                param_name = f"{param_prefix}_{i}"
                if is_date and isinstance(val, str):
                    val = datetime.strptime(val, '%Y-%m-%d').date()
                if exact:
                    conditions.append(f"{field} = %({param_name})s")
                else:
                    conditions.append(f"{field} ILIKE %({param_name})s")
                    val = f"%{val}%" if not is_date else val
                params[param_name] = val
            query += " AND (" + " OR ".join(conditions) + ")"
        else:
            if is_date and isinstance(value, str):
                value = datetime.strptime(value, '%Y-%m-%d').date()
            if exact:
                query += f" AND {field} = %({param_prefix})s"
            else:
                query += f" AND {field} ILIKE %({param_prefix})s"
                value = f"%{value}%" if not is_date else value
            params[param_prefix] = value
    
    # Add filters
    if customer_id is not None:
        add_condition("customer_id", customer_id, exact=True)
    
    if name is not None:
        add_condition("name", name, exact=exact_match)
    
    if city is not None:
        add_condition("city", city, exact=exact_match)
    
    if update_date is not None:
        add_condition("update_date", update_date, exact=True, is_date=True)
    
    # Date range filters
    if min_update_date:
        if isinstance(min_update_date, str):
            min_update_date = datetime.strptime(min_update_date, '%Y-%m-%d').date()
        params['min_date'] = min_update_date
        query += " AND update_date >= %(min_date)s"
    
    if max_update_date:
        if isinstance(max_update_date, str):
            max_update_date = datetime.strptime(max_update_date, '%Y-%m-%d').date()
        params['max_date'] = max_update_date
        query += " AND update_date <= %(max_date)s"
    
    # Execute the query
    engine = get_db_engine()
    try:
        with engine.connect() as conn:
            if params:
                with conn.connection.cursor() as cursor:
                    cursor.execute(query, params)
                    columns = [desc[0] for desc in cursor.description]
                    data = cursor.fetchall()
                    return pd.DataFrame(data, columns=columns)
            else:
                return pd.read_sql(query, conn)
    finally:
        engine.dispose()

In [51]:
# Get Accounts information Function
def get_accounts(
    account_no: Union[str, List[str], None] = None,
    account_type: Union[str, List[str], None] = None,
    customer_id: Union[str, List[str], None] = None,
    account_status: Union[str, List[str], None] = None,
    activation_date: Union[str, datetime.date, List[Union[str, datetime.date]], None] = None,
    exact_match: bool = True,
    min_activation_date: Union[str, datetime.date, None] = None,
    max_activation_date: Union[str, datetime.date, None] = None
) -> pd.DataFrame:
    """
    Flexible account data retrieval with support for all parameter types.

    Parameters:
    - account_no: Single or list of account numbers
    - account_type: Single or list of account types
    - customer_id: Single or list of customer IDs
    - account_status: Single or list of statuses
    - activation_date: Single or list of activation dates
    - exact_match: Use False for partial text matching
    - min_activation_date: Inclusive minimum activation date
    - max_activation_date: Inclusive maximum activation date

    Returns:
    - Pandas DataFrame with matching account records
    """

    query = "SELECT * FROM accounts WHERE 1=1"
    params = {}
    param_counter = 0

    def add_condition(field, value, exact=True, is_date=False):
        nonlocal query, params, param_counter
        param_prefix = f"{field}_{param_counter}"
        param_counter += 1

        if isinstance(value, (list, tuple)):
            conditions = []
            for i, val in enumerate(value):
                param_name = f"{param_prefix}_{i}"
                if is_date and isinstance(val, str):
                    val = datetime.strptime(val, '%Y-%m-%d').date()
                if exact:
                    conditions.append(f"{field} = %({param_name})s")
                else:
                    conditions.append(f"{field} ILIKE %({param_name})s")
                    val = f"%{val}%" if not is_date else val
                params[param_name] = val
            query += " AND (" + " OR ".join(conditions) + ")"
        else:
            if is_date and isinstance(value, str):
                value = datetime.strptime(value, '%Y-%m-%d').date()
            if exact:
                query += f" AND {field} = %({param_prefix})s"
            else:
                query += f" AND {field} ILIKE %({param_prefix})s"
                value = f"%{value}%" if not is_date else value
            params[param_prefix] = value

    # Apply filters
    if account_no is not None:
        add_condition("account_no", account_no, exact=True)
    if account_type is not None:
        add_condition("account_type", account_type, exact=exact_match)
    if customer_id is not None:
        add_condition("customer_id", customer_id, exact=True)
    if account_status is not None:
        add_condition("account_status", account_status, exact=exact_match)
    if activation_date is not None:
        add_condition("activation_date", activation_date, exact=True, is_date=True)
    if min_activation_date:
        if isinstance(min_activation_date, str):
            min_activation_date = datetime.strptime(min_activation_date, '%Y-%m-%d').date()
        query += " AND activation_date >= %(min_date)s"
        params['min_date'] = min_activation_date
    if max_activation_date:
        if isinstance(max_activation_date, str):
            max_activation_date = datetime.strptime(max_activation_date, '%Y-%m-%d').date()
        query += " AND activation_date <= %(max_date)s"
        params['max_date'] = max_activation_date

    # Execute the query
    engine = get_db_engine()
    try:
        with engine.connect() as conn:
            if params:
                with conn.connection.cursor() as cursor:
                    cursor.execute(query, params)
                    columns = [desc[0] for desc in cursor.description]
                    data = cursor.fetchall()
                    return pd.DataFrame(data, columns=columns)
            else:
                return pd.read_sql(query, conn)
    finally:
        engine.dispose()


In [52]:
# Get Transaction information Function
def get_transactions(
    transaction_id: Union[int, List[int], None] = None,
    account_no: Union[str, List[str], None] = None,
    customer_id: Union[str, List[str], None] = None,
    amount: Union[float, List[float], None] = None,
    min_amount: float = None,
    max_amount: float = None,
    transaction_time: Union[str, datetime, List[Union[str, datetime]], None] = None,
    min_transaction_time: Union[str, datetime, None] = None,
    max_transaction_time: Union[str, datetime, None] = None
) -> pd.DataFrame:
    """
    Flexible transaction data retrieval with filters on all key fields.

    Parameters:
    - transaction_id: single ID or list
    - account_no: single or list of account numbers
    - customer_id: single or list of customer IDs
    - amount: exact amount or list of exact amounts
    - min_amount: lower bound for amount
    - max_amount: upper bound for amount
    - transaction_time: exact timestamp or list of timestamps
    - min_transaction_time: datetime lower bound
    - max_transaction_time: datetime upper bound

    Returns:
    - Pandas DataFrame with matching transactions
    """

    query = "SELECT * FROM transactions WHERE 1=1"
    params = {}
    param_counter = 0

    def add_condition(field, value, is_date=False):
        nonlocal query, params, param_counter
        param_prefix = f"{field}_{param_counter}"
        param_counter += 1

        if isinstance(value, (list, tuple)):
            conditions = []
            for i, val in enumerate(value):
                param_name = f"{param_prefix}_{i}"
                if is_date and isinstance(val, str):
                    val = datetime.strptime(val, '%Y-%m-%d %H:%M:%S')
                conditions.append(f"{field} = %({param_name})s")
                params[param_name] = val
            query += " AND (" + " OR ".join(conditions) + ")"
        else:
            if is_date and isinstance(value, str):
                value = datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
            query += f" AND {field} = %({param_prefix})s"
            params[param_prefix] = value

    if transaction_id is not None:
        add_condition("transaction_id", transaction_id)
    if account_no is not None:
        add_condition("account_no", account_no)
    if customer_id is not None:
        add_condition("customer_id", customer_id)
    if amount is not None:
        add_condition("amount", amount)

    if min_amount is not None:
        query += " AND amount >= %(min_amount)s"
        params['min_amount'] = min_amount
    if max_amount is not None:
        query += " AND amount <= %(max_amount)s"
        params['max_amount'] = max_amount

    if transaction_time is not None:
        add_condition("transaction_time", transaction_time, is_date=True)
    if min_transaction_time:
        if isinstance(min_transaction_time, str):
            min_transaction_time = datetime.strptime(min_transaction_time, '%Y-%m-%d %H:%M:%S')
        query += " AND transaction_time >= %(min_time)s"
        params['min_time'] = min_transaction_time
    if max_transaction_time:
        if isinstance(max_transaction_time, str):
            max_transaction_time = datetime.strptime(max_transaction_time, '%Y-%m-%d %H:%M:%S')
        query += " AND transaction_time <= %(max_time)s"
        params['max_time'] = max_transaction_time

    # Execute query
    engine = get_db_engine()
    try:
        with engine.connect() as conn:
            if params:
                with conn.connection.cursor() as cursor:
                    cursor.execute(query, params)
                    columns = [desc[0] for desc in cursor.description]
                    data = cursor.fetchall()
                    return pd.DataFrame(data, columns=columns)
            else:
                return pd.read_sql(query, conn)
    finally:
        engine.dispose()

# LEVEL 1: Merging/Joining The Tables 

In [53]:
def get_cust_acc(
    join_type: str = "left",
    join_id: str = "customer_id",
    customer_filters: Optional[dict] = None,
    account_filters: Optional[dict] = None
) -> pd.DataFrame:
    customers_df = get_customers(**(customer_filters or {}))
    accounts_df = get_accounts(**(account_filters or {}))

    merged = pd.merge(
        customers_df,
        accounts_df,
        how=join_type.lower(),
        left_on=join_id,
        right_on=join_id,
        suffixes=('', '_dup')
    )

    # Drop the duplicate join key from right table
    dup_col = f"{join_id}_dup"
    if dup_col in merged.columns:
        merged.drop(columns=[dup_col], inplace=True)

    return merged


In [54]:
def get_cust_tran(
    join_type: str = "left",
    join_id: str = "customer_id",
    customer_filters: Optional[dict] = None,
    transaction_filters: Optional[dict] = None
) -> pd.DataFrame:
    customers_df = get_customers(**(customer_filters or {}))
    transactions_df = get_transactions(**(transaction_filters or {}))

    merged = pd.merge(
        customers_df,
        transactions_df,
        how=join_type.lower(),
        left_on=join_id,
        right_on=join_id,
        suffixes=('', '_dup')
    )

    dup_col = f"{join_id}_dup"
    if dup_col in merged.columns:
        merged.drop(columns=[dup_col], inplace=True)

    return merged


In [55]:
def get_acc_tran(
    join_type: str = "left",
    join_id: str = "account_no",
    account_filters: Optional[dict] = None,
    transaction_filters: Optional[dict] = None
) -> pd.DataFrame:
    accounts_df = get_accounts(**(account_filters or {}))
    transactions_df = get_transactions(**(transaction_filters or {}))

    merged = pd.merge(
        accounts_df,
        transactions_df,
        how=join_type.lower(),
        left_on=join_id,
        right_on=join_id,
        suffixes=('', '_dup')
    )

    dup_col = f"{join_id}_dup"
    if dup_col in merged.columns:
        merged.drop(columns=[dup_col], inplace=True)

    return merged


# LEVEL 3: Super Function

In [63]:
def get_cust_acc_tran(
    join_type: str = "left",
    customer_filters: Optional[dict] = None,
    account_filters: Optional[dict] = None,
    transaction_filters: Optional[dict] = None,
    customer_account_key: str = "customer_id",
    account_transaction_key: str = "account_no"
) -> pd.DataFrame:
    """
    Join customer, accounts, and transactions into a single DataFrame.

    Parameters:
    - join_type: Type of SQL-style join ('left', 'inner', etc.)
    - customer_filters: dict to filter customer records (passed to get_customers)
    - account_filters: dict to filter account records (passed to get_accounts)
    - transaction_filters: dict to filter transaction records (passed to get_transactions)
    - customer_account_key: key column to join customers and accounts
    - account_transaction_key: key column to join accounts and transactions

    Returns:
    - A merged pandas DataFrame combining customer, account, and transaction data
    """

    # Get filtered DataFrames
    customer_df = get_customers(**(customer_filters or {}))
    account_df = get_accounts(**(account_filters or {}))
    transaction_df = get_transactions(**(transaction_filters or {}))

    # Join Customer + Account
    cust_acc_df = pd.merge(
        customer_df,
        account_df,
        how=join_type,
        left_on=customer_account_key,
        right_on=customer_account_key,
        suffixes=('_cust', '_acc')
    )

    # Avoid duplicate join key column in final join
    transaction_df = transaction_df.drop(columns=[customer_account_key], errors='ignore')

    # Join with Transaction
    full_df = pd.merge(
        cust_acc_df,
        transaction_df,
        how=join_type,
        left_on=account_transaction_key,
        right_on=account_transaction_key,
        suffixes=('', '_tran')
    )

    return full_df
