In [None]:
import pandas as pd

def detect_schema_mismatch(expected_schema: dict, actual_df: pd.DataFrame):
    """
    Detect mismatches between expected schema and actual DataFrame schema.

    Args:
    - expected_schema: dict with column names as keys and data types as values
    - actual_df: pandas DataFrame of the actual data

    Returns:
    - dict with keys 'missing_columns', 'extra_columns', 'type_mismatches'
    """
    expected_cols = set(expected_schema.keys())
    actual_cols = set(actual_df.columns)

    missing_columns = expected_cols - actual_cols
    extra_columns = actual_cols - expected_cols

    type_mismatches = {}
    for col in expected_cols.intersection(actual_cols):
        expected_type = expected_schema[col]
        actual_type = str(actual_df[col].dtype)
        # Simplify dtype names for comparison
        if expected_type.lower() not in actual_type.lower():
            type_mismatches[col] = {'expected': expected_type, 'actual': actual_type}

    return {
        'missing_columns': missing_columns,
        'extra_columns': extra_columns,
        'type_mismatches': type_mismatches
    }


# Example usage:
expected_schema = {
    'id': 'int64',
    'name': 'object',
    'age': 'int64',
    'salary': 'float64'
}

# Load your actual data, for example:
# actual_df = pd.read_csv('your_data.csv')

# Example actual data for demonstration
data = {
    'id': [1, 2, 3],
    'name': ['Alice', 'Bob', 'Charlie'],
    'age': ['25', '30', '35'],  # age as string instead of int
    'bonus': [1000, 1500, 1200]  # extra column not expected
}
actual_df = pd.DataFrame(data)

mismatches = detect_schema_mismatch(expected_schema, actual_df)
print("Missing columns:", mismatches['missing_columns'])
print("Extra columns:", mismatches['extra_columns'])
print("Type mismatches:", mismatches['type_mismatches'])