#### Upload Data from S3 to PostgreSQL

In [None]:
import boto3
import pandas as pd
import psycopg2
from io import StringIO
from sqlalchemy import create_engine, text
import glob
import os
from dotenv import load_dotenv
import logging
from typing import Dict, Any

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# For display
pd.set_option('display.max_columns', None)

In [16]:
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class DataPipeline:
    def __init__(self):
        """Initialize the data pipeline with configurations."""
        load_dotenv()
        self.engine = self._create_db_connection()
        self.bucket = "finriskai"
        self.prefix = "datasets/"
        self.files = self._get_s3_file_paths()
        
    def _create_db_connection(self):
        """Create PostgreSQL connection engine."""
        try:
            db_url = (
                f"postgresql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@"
                f"{os.getenv('DB_HOST')}:{os.getenv('DB_PORT')}/{os.getenv('DB_NAME')}"
            )
            engine = create_engine(db_url)
            logger.info("Database connection established successfully")
            return engine
        except Exception as e:
            logger.error(f"Failed to create database connection: {e}")
            raise
    
    def _get_s3_file_paths(self) -> Dict[str, str]:
        """Define S3 file paths."""
        return {
            "applications": f"s3://{self.bucket}/{self.prefix}credit_applications.csv",
            "bureau": f"s3://{self.bucket}/{self.prefix}credit_bureau_data.csv",
            "profiles": f"s3://{self.bucket}/{self.prefix}customer_profiles.csv",
            "predictions": f"s3://{self.bucket}/{self.prefix}model_predictions.csv",
            "transactions": f"s3://{self.bucket}/{self.prefix}transaction_data.csv",
        }
    
    def load_data_from_s3(self) -> Dict[str, pd.DataFrame]:
        """Load all datasets from S3 into DataFrames with error handling."""
        dataframes = {}
        storage_options = {"anon": False}
        
        for name, path in self.files.items():
            try:
                logger.info(f"Loading {name} from S3...")
                df = pd.read_csv(path, storage_options=storage_options)
                logger.info(f"Successfully loaded {name}: {len(df)} rows")
                dataframes[name] = df
            except Exception as e:
                logger.error(f"Failed to load {name} from S3: {e}")
                raise
        
        return dataframes
    
    def analyze_data_ranges(self, dataframes: Dict[str, pd.DataFrame]):
        """Analyze data ranges to set appropriate constraints."""
        if 'profiles' in dataframes:
            df = dataframes['profiles']
            logger.info("Data range analysis for profiles:")
            logger.info(f"Behavioral score range: {df['behavioral_score'].min():.2f} - {df['behavioral_score'].max():.2f}")
            logger.info(f"Credit score range: {df['credit_score'].min()} - {df['credit_score'].max()}")
            logger.info(f"Customer age range: {df['customer_age'].min()} - {df['customer_age'].max()}")
            
        if 'bureau' in dataframes:
            df = dataframes['bureau']
            logger.info("Data range analysis for bureau:")
            logger.info(f"Credit utilization range: {df['credit_utilization'].min():.3f} - {df['credit_utilization'].max():.3f}")
            logger.info(f"Payment history range: {df['payment_history'].min():.3f} - {df['payment_history'].max():.3f}")

    def drop_tables(self):
        """Drop existing tables to recreate with new constraints."""
        tables_to_drop = [
            "fact_transactions",
            "fact_predictions", 
            "fact_applications",
            "dim_bureau",
            "dim_customer_profiles"
        ]
        
        try:
            with self.engine.begin() as conn:
                for table in tables_to_drop:
                    logger.info(f"Dropping table: {table}")
                    conn.execute(text(f"DROP TABLE IF EXISTS {table} CASCADE;"))
                logger.info("All tables dropped successfully")
        except Exception as e:
            logger.error(f"Failed to drop tables: {e}")
            raise

    def create_tables(self):
        """Create database tables with improved schema."""
        table_schemas = {
            "dim_customer_profiles": """
                CREATE TABLE IF NOT EXISTS dim_customer_profiles (
                    customer_id VARCHAR PRIMARY KEY,
                    customer_age INT CHECK (customer_age > 0 AND customer_age <= 120),
                    annual_income NUMERIC(15,2) CHECK (annual_income >= 0),
                    employment_status VARCHAR(50),
                    account_tenure INT CHECK (account_tenure >= 0),
                    product_holdings INT CHECK (product_holdings >= 0),
                    relationship_value NUMERIC(15,2),
                    risk_segment VARCHAR(50),
                    behavioral_score NUMERIC(10,2) CHECK (behavioral_score >= 0),
                    credit_score INT CHECK (credit_score >= 0 AND credit_score <= 1000),
                    city VARCHAR(100),
                    last_activity_date DATE,
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                );
            """,
            
            "dim_bureau": """
                CREATE TABLE IF NOT EXISTS dim_bureau (
                    customer_id VARCHAR PRIMARY KEY,
                    credit_score INT CHECK (credit_score >= 0 AND credit_score <= 1000),
                    credit_history_length INT CHECK (credit_history_length >= 0),
                    number_of_accounts INT CHECK (number_of_accounts >= 0),
                    total_credit_limit NUMERIC(15,2) CHECK (total_credit_limit >= 0),
                    credit_utilization NUMERIC(6,3) CHECK (credit_utilization >= 0),
                    payment_history NUMERIC(6,3) CHECK (payment_history >= 0),
                    public_records INT CHECK (public_records >= 0),
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                );
            """,
            
            "fact_applications": """
                CREATE TABLE IF NOT EXISTS fact_applications (
                    application_id VARCHAR PRIMARY KEY,
                    customer_id VARCHAR,
                    application_date DATE NOT NULL,
                    loan_amount NUMERIC(15,2) CHECK (loan_amount > 0),
                    loan_purpose VARCHAR(50),
                    employment_status VARCHAR(50),
                    annual_income NUMERIC(15,2) CHECK (annual_income >= 0),
                    debt_to_income_ratio NUMERIC(6,3) CHECK (debt_to_income_ratio >= 0),
                    credit_score INT CHECK (credit_score >= 0 AND credit_score <= 1000),
                    application_status VARCHAR(20),
                    default_flag INT CHECK (default_flag IN (0, 1)),
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    FOREIGN KEY (customer_id) REFERENCES dim_customer_profiles(customer_id) ON DELETE CASCADE
                );
            """,
            
            "fact_predictions": """
                CREATE TABLE IF NOT EXISTS fact_predictions (
                    prediction_id VARCHAR PRIMARY KEY,
                    model_version VARCHAR(50) NOT NULL,
                    customer_id VARCHAR,
                    prediction_date DATE NOT NULL,
                    prediction_type VARCHAR(30),
                    risk_score NUMERIC(10,2) CHECK (risk_score >= 0),
                    fraud_probability NUMERIC(6,3) CHECK (fraud_probability >= 0 AND fraud_probability <= 1),
                    model_features JSONB,
                    prediction_explanation TEXT,
                    business_decision VARCHAR(50),
                    actual_outcome VARCHAR(50),
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    FOREIGN KEY (customer_id) REFERENCES dim_customer_profiles(customer_id) ON DELETE CASCADE
                );
            """,
            
            "fact_transactions": """
                CREATE TABLE IF NOT EXISTS fact_transactions (
                    transaction_id VARCHAR PRIMARY KEY,
                    customer_id VARCHAR,
                    transaction_date TIMESTAMP NOT NULL,
                    amount NUMERIC(15,2) NOT NULL,
                    merchant_category VARCHAR(50),
                    transaction_type VARCHAR(30),
                    location VARCHAR(100),
                    device_info VARCHAR(200),
                    fraud_flag INT CHECK (fraud_flag IN (0, 1)),
                    investigation_status VARCHAR(30),
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    FOREIGN KEY (customer_id) REFERENCES dim_customer_profiles(customer_id) ON DELETE CASCADE
                );
            """
        }
        
        try:
            with self.engine.begin() as conn:
                for table_name, schema in table_schemas.items():
                    logger.info(f"Creating table: {table_name}")
                    conn.execute(text(schema))
                logger.info("All tables created successfully")
        except Exception as e:
            logger.error(f"Failed to create tables: {e}")
            raise
    
    def create_indexes(self):
        """Create indexes for better query performance."""
        indexes = [
            "CREATE INDEX IF NOT EXISTS idx_applications_customer_id ON fact_applications(customer_id);",
            "CREATE INDEX IF NOT EXISTS idx_applications_date ON fact_applications(application_date);",
            "CREATE INDEX IF NOT EXISTS idx_applications_status ON fact_applications(application_status);",
            "CREATE INDEX IF NOT EXISTS idx_predictions_customer_id ON fact_predictions(customer_id);",
            "CREATE INDEX IF NOT EXISTS idx_predictions_date ON fact_predictions(prediction_date);",
            "CREATE INDEX IF NOT EXISTS idx_predictions_type ON fact_predictions(prediction_type);",
            "CREATE INDEX IF NOT EXISTS idx_transactions_customer_id ON fact_transactions(customer_id);",
            "CREATE INDEX IF NOT EXISTS idx_transactions_date ON fact_transactions(transaction_date);",
            "CREATE INDEX IF NOT EXISTS idx_transactions_fraud ON fact_transactions(fraud_flag);",
            "CREATE INDEX IF NOT EXISTS idx_profiles_risk_segment ON dim_customer_profiles(risk_segment);",
            "CREATE INDEX IF NOT EXISTS idx_profiles_city ON dim_customer_profiles(city);"
        ]
        
        try:
            with self.engine.begin() as conn:
                for index in indexes:
                    conn.execute(text(index))
                logger.info("Indexes created successfully")
        except Exception as e:
            logger.error(f"Failed to create indexes: {e}")
            raise
    
    def load_data_to_postgres(self, dataframes: Dict[str, pd.DataFrame]):
        """Load DataFrames to PostgreSQL with error handling and data validation."""
        table_mapping = {
            "profiles": "dim_customer_profiles",
            "bureau": "dim_bureau",
            "applications": "fact_applications",
            "predictions": "fact_predictions",
            "transactions": "fact_transactions"
        }
        
        # Load dimension tables first (to satisfy foreign key constraints)
        dimension_order = ["profiles", "bureau"]
        fact_order = ["applications", "predictions", "transactions"]
        
        try:
            # Load dimensions first
            for df_name in dimension_order:
                if df_name in dataframes:
                    table_name = table_mapping[df_name]
                    df = dataframes[df_name]
                    
                    # Debug: Check for any problematic values
                    logger.info(f"Checking data quality for {df_name}...")
                    if df_name == "profiles":
                        # Check for null values in key columns
                        null_counts = df.isnull().sum()
                        if null_counts.sum() > 0:
                            logger.warning(f"Found null values in {df_name}: {null_counts[null_counts > 0].to_dict()}")
                        
                        # Check data types
                        logger.info(f"Data types: {df.dtypes.to_dict()}")
                        
                        # Sample problematic rows
                        if 'behavioral_score' in df.columns:
                            extreme_scores = df[df['behavioral_score'] < 0]
                            if len(extreme_scores) > 0:
                                logger.warning(f"Found {len(extreme_scores)} rows with negative behavioral_score")
                    
                    logger.info(f"Loading {len(df)} records into {table_name}")
                    
                    # Try loading in smaller chunks to isolate problematic rows
                    chunk_size = 100
                    total_rows = len(df)
                    
                    for i in range(0, total_rows, chunk_size):
                        chunk = df.iloc[i:i+chunk_size]
                        try:
                            chunk.to_sql(table_name, con=self.engine, if_exists="append", 
                                       index=False, method='multi')
                            if (i + chunk_size) % 1000 == 0:
                                logger.info(f"Loaded {min(i + chunk_size, total_rows)} / {total_rows} rows")
                        except Exception as chunk_error:
                            logger.error(f"Error loading chunk {i}-{i+chunk_size}: {chunk_error}")
                            logger.error(f"Problematic data sample:\n{chunk.head()}")
                            raise
                    
                    logger.info(f"Successfully loaded {df_name}")
            
            # Load fact tables
            for df_name in fact_order:
                if df_name in dataframes:
                    table_name = table_mapping[df_name]
                    df = dataframes[df_name]
                    
                    logger.info(f"Loading {len(df)} records into {table_name}")
                    df.to_sql(table_name, con=self.engine, if_exists="append", 
                             index=False, method='multi', chunksize=1000)
                    logger.info(f"Successfully loaded {df_name}")
                    
        except Exception as e:
            logger.error(f"Failed to load data to PostgreSQL: {e}")
            raise
    
    def validate_data_load(self):
        """Validate that data was loaded correctly."""
        validation_queries = {
            "dim_customer_profiles": "SELECT COUNT(*) as count FROM dim_customer_profiles;",
            "dim_bureau": "SELECT COUNT(*) as count FROM dim_bureau;",
            "fact_applications": "SELECT COUNT(*) as count FROM fact_applications;",
            "fact_predictions": "SELECT COUNT(*) as count FROM fact_predictions;",
            "fact_transactions": "SELECT COUNT(*) as count FROM fact_transactions;"
        }
        
        try:
            with self.engine.connect() as conn:
                for table, query in validation_queries.items():
                    result = conn.execute(text(query)).fetchone()
                    logger.info(f"{table}: {result[0]} records loaded")
        except Exception as e:
            logger.error(f"Failed to validate data load: {e}")
            raise
    
    def run_pipeline(self, force_recreate=True):
        """Execute the complete data pipeline."""
        try:
            logger.info("Starting data pipeline...")
            
            # Step 1: Load data from S3
            dataframes = self.load_data_from_s3()
            
            # Step 2: Analyze data ranges (optional but helpful for debugging)
            self.analyze_data_ranges(dataframes)
            
            # Step 3: Drop existing tables if force_recreate is True
            if force_recreate:
                logger.info("Force recreate enabled - dropping existing tables")
                self.drop_tables()
            
            # Step 4: Create tables
            self.create_tables()
            
            # Step 5: Create indexes
            self.create_indexes()
            
            # Step 6: Load data to PostgreSQL
            self.load_data_to_postgres(dataframes)
            
            # Step 7: Validate data load
            self.validate_data_load()
            
            logger.info("Data pipeline completed successfully!")
            
        except Exception as e:
            logger.error(f"Data pipeline failed: {e}")
            raise

In [17]:
# Usage
if __name__ == "__main__":
    pipeline = DataPipeline()
    # Set force_recreate=True to drop and recreate tables with new constraints
    pipeline.run_pipeline(force_recreate=True)

2025-08-16 19:34:30,249 - INFO - Database connection established successfully
2025-08-16 19:34:30,251 - INFO - Starting data pipeline...
2025-08-16 19:34:30,254 - INFO - Loading applications from S3...
2025-08-16 19:34:55,515 - INFO - Successfully loaded applications: 100000 rows
2025-08-16 19:34:55,518 - INFO - Loading bureau from S3...
2025-08-16 19:34:58,565 - INFO - Successfully loaded bureau: 25000 rows
2025-08-16 19:34:58,566 - INFO - Loading profiles from S3...
2025-08-16 19:35:03,524 - INFO - Successfully loaded profiles: 25000 rows
2025-08-16 19:35:03,527 - INFO - Loading predictions from S3...
2025-08-16 19:35:23,265 - INFO - Successfully loaded predictions: 50000 rows
2025-08-16 19:35:23,267 - INFO - Loading transactions from S3...
2025-08-16 19:35:36,504 - INFO - Successfully loaded transactions: 100000 rows
2025-08-16 19:35:36,505 - INFO - Data range analysis for profiles:
2025-08-16 19:35:36,511 - INFO - Behavioral score range: 2.19 - 927.24
2025-08-16 19:35:36,513 - INFO

IntegrityError: (psycopg2.errors.CheckViolation) new row for relation "dim_customer_profiles" violates check constraint "dim_customer_profiles_customer_age_check"
DETAIL:  Failing row contains (CUST_000054, 132, 147795.43, Retired, 2, 5, 88677.26, Prime, 429.58, 850, Sheffield, 2025-07-18, 2025-08-16 19:35:50.259432, 2025-08-16 19:35:50.259432).

[SQL: INSERT INTO dim_customer_profiles (customer_id, customer_age, annual_income, employment_status, account_tenure, product_holdings, relationship_value, risk_segment, behavioral_score, credit_score, city, last_activity_date) VALUES (%(customer_id_m0)s, %(customer_age_m0)s, %(annual_income_m0)s, %(employment_status_m0)s, %(account_tenure_m0)s, %(product_holdings_m0)s, %(relationship_value_m0)s, %(risk_segment_m0)s, %(behavioral_score_m0)s, %(credit_score_m0)s, %(city_m0)s, %(last_activity_date_m0)s), (%(customer_id_m1)s, %(customer_age_m1)s, %(annual_income_m1)s, %(employment_status_m1)s, %(account_tenure_m1)s, %(product_holdings_m1)s, %(relationship_value_m1)s, %(risk_segment_m1)s, %(behavioral_score_m1)s, %(credit_score_m1)s, %(city_m1)s, %(last_activity_date_m1)s), (%(customer_id_m2)s, %(customer_age_m2)s, %(annual_income_m2)s, %(employment_status_m2)s, %(account_tenure_m2)s, %(product_holdings_m2)s, %(relationship_value_m2)s, %(risk_segment_m2)s, %(behavioral_score_m2)s, %(credit_score_m2)s, %(city_m2)s, %(last_activity_date_m2)s), (%(customer_id_m3)s, %(customer_age_m3)s, %(annual_income_m3)s, %(employment_status_m3)s, %(account_tenure_m3)s, %(product_holdings_m3)s, %(relationship_value_m3)s, %(risk_segment_m3)s, %(behavioral_score_m3)s, %(credit_score_m3)s, %(city_m3)s, %(last_activity_date_m3)s), (%(customer_id_m4)s, %(customer_age_m4)s, %(annual_income_m4)s, %(employment_status_m4)s, %(account_tenure_m4)s, %(product_holdings_m4)s, %(relationship_value_m4)s, %(risk_segment_m4)s, %(behavioral_score_m4)s, %(credit_score_m4)s, %(city_m4)s, %(last_activity_date_m4)s), (%(customer_id_m5)s, %(customer_age_m5)s, %(annual_income_m5)s, %(employment_status_m5)s, %(account_tenure_m5)s, %(product_holdings_m5)s, %(relationship_value_m5)s, %(risk_segment_m5)s, %(behavioral_score_m5)s, %(credit_score_m5)s, %(city_m5)s, %(last_activity_date_m5)s), (%(customer_id_m6)s, %(customer_age_m6)s, %(annual_income_m6)s, %(employment_status_m6)s, %(account_tenure_m6)s, %(product_holdings_m6)s, %(relationship_value_m6)s, %(risk_segment_m6)s, %(behavioral_score_m6)s, %(credit_score_m6)s, %(city_m6)s, %(last_activity_date_m6)s), (%(customer_id_m7)s, %(customer_age_m7)s, %(annual_income_m7)s, %(employment_status_m7)s, %(account_tenure_m7)s, %(product_holdings_m7)s, %(relationship_value_m7)s, %(risk_segment_m7)s, %(behavioral_score_m7)s, %(credit_score_m7)s, %(city_m7)s, %(last_activity_date_m7)s), (%(customer_id_m8)s, %(customer_age_m8)s, %(annual_income_m8)s, %(employment_status_m8)s, %(account_tenure_m8)s, %(product_holdings_m8)s, %(relationship_value_m8)s, %(risk_segment_m8)s, %(behavioral_score_m8)s, %(credit_score_m8)s, %(city_m8)s, %(last_activity_date_m8)s), (%(customer_id_m9)s, %(customer_age_m9)s, %(annual_income_m9)s, %(employment_status_m9)s, %(account_tenure_m9)s, %(product_holdings_m9)s, %(relationship_value_m9)s, %(risk_segment_m9)s, %(behavioral_score_m9)s, %(credit_score_m9)s, %(city_m9)s, %(last_activity_date_m9)s), (%(customer_id_m10)s, %(customer_age_m10)s, %(annual_income_m10)s, %(employment_status_m10)s, %(account_tenure_m10)s, %(product_holdings_m10)s, %(relationship_value_m10)s, %(risk_segment_m10)s, %(behavioral_score_m10)s, %(credit_score_m10)s, %(city_m10)s, %(last_activity_date_m10)s), (%(customer_id_m11)s, %(customer_age_m11)s, %(annual_income_m11)s, %(employment_status_m11)s, %(account_tenure_m11)s, %(product_holdings_m11)s, %(relationship_value_m11)s, %(risk_segment_m11)s, %(behavioral_score_m11)s, %(credit_score_m11)s, %(city_m11)s, %(last_activity_date_m11)s), (%(customer_id_m12)s, %(customer_age_m12)s, %(annual_income_m12)s, %(employment_status_m12)s, %(account_tenure_m12)s, %(product_holdings_m12)s, %(relationship_value_m12)s, %(risk_segment_m12)s, %(behavioral_score_m12)s, %(credit_score_m12)s, %(city_m12)s, %(last_activity_date_m12)s), (%(customer_id_m13)s, %(customer_age_m13)s, %(annual_income_m13)s, %(employment_status_m13)s, %(account_tenure_m13)s, %(product_holdings_m13)s, %(relationship_value_m13)s, %(risk_segment_m13)s, %(behavioral_score_m13)s, %(credit_score_m13)s, %(city_m13)s, %(last_activity_date_m13)s), (%(customer_id_m14)s, %(customer_age_m14)s, %(annual_income_m14)s, %(employment_status_m14)s, %(account_tenure_m14)s, %(product_holdings_m14)s, %(relationship_value_m14)s, %(risk_segment_m14)s, %(behavioral_score_m14)s, %(credit_score_m14)s, %(city_m14)s, %(last_activity_date_m14)s), (%(customer_id_m15)s, %(customer_age_m15)s, %(annual_income_m15)s, %(employment_status_m15)s, %(account_tenure_m15)s, %(product_holdings_m15)s, %(relationship_value_m15)s, %(risk_segment_m15)s, %(behavioral_score_m15)s, %(credit_score_m15)s, %(city_m15)s, %(last_activity_date_m15)s), (%(customer_id_m16)s, %(customer_age_m16)s, %(annual_income_m16)s, %(employment_status_m16)s, %(account_tenure_m16)s, %(product_holdings_m16)s, %(relationship_value_m16)s, %(risk_segment_m16)s, %(behavioral_score_m16)s, %(credit_score_m16)s, %(city_m16)s, %(last_activity_date_m16)s), (%(customer_id_m17)s, %(customer_age_m17)s, %(annual_income_m17)s, %(employment_status_m17)s, %(account_tenure_m17)s, %(product_holdings_m17)s, %(relationship_value_m17)s, %(risk_segment_m17)s, %(behavioral_score_m17)s, %(credit_score_m17)s, %(city_m17)s, %(last_activity_date_m17)s), (%(customer_id_m18)s, %(customer_age_m18)s, %(annual_income_m18)s, %(employment_status_m18)s, %(account_tenure_m18)s, %(product_holdings_m18)s, %(relationship_value_m18)s, %(risk_segment_m18)s, %(behavioral_score_m18)s, %(credit_score_m18)s, %(city_m18)s, %(last_activity_date_m18)s), (%(customer_id_m19)s, %(customer_age_m19)s, %(annual_income_m19)s, %(employment_status_m19)s, %(account_tenure_m19)s, %(product_holdings_m19)s, %(relationship_value_m19)s, %(risk_segment_m19)s, %(behavioral_score_m19)s, %(credit_score_m19)s, %(city_m19)s, %(last_activity_date_m19)s), (%(customer_id_m20)s, %(customer_age_m20)s, %(annual_income_m20)s, %(employment_status_m20)s, %(account_tenure_m20)s, %(product_holdings_m20)s, %(relationship_value_m20)s, %(risk_segment_m20)s, %(behavioral_score_m20)s, %(credit_score_m20)s, %(city_m20)s, %(last_activity_date_m20)s), (%(customer_id_m21)s, %(customer_age_m21)s, %(annual_income_m21)s, %(employment_status_m21)s, %(account_tenure_m21)s, %(product_holdings_m21)s, %(relationship_value_m21)s, %(risk_segment_m21)s, %(behavioral_score_m21)s, %(credit_score_m21)s, %(city_m21)s, %(last_activity_date_m21)s), (%(customer_id_m22)s, %(customer_age_m22)s, %(annual_income_m22)s, %(employment_status_m22)s, %(account_tenure_m22)s, %(product_holdings_m22)s, %(relationship_value_m22)s, %(risk_segment_m22)s, %(behavioral_score_m22)s, %(credit_score_m22)s, %(city_m22)s, %(last_activity_date_m22)s), (%(customer_id_m23)s, %(customer_age_m23)s, %(annual_income_m23)s, %(employment_status_m23)s, %(account_tenure_m23)s, %(product_holdings_m23)s, %(relationship_value_m23)s, %(risk_segment_m23)s, %(behavioral_score_m23)s, %(credit_score_m23)s, %(city_m23)s, %(last_activity_date_m23)s), (%(customer_id_m24)s, %(customer_age_m24)s, %(annual_income_m24)s, %(employment_status_m24)s, %(account_tenure_m24)s, %(product_holdings_m24)s, %(relationship_value_m24)s, %(risk_segment_m24)s, %(behavioral_score_m24)s, %(credit_score_m24)s, %(city_m24)s, %(last_activity_date_m24)s), (%(customer_id_m25)s, %(customer_age_m25)s, %(annual_income_m25)s, %(employment_status_m25)s, %(account_tenure_m25)s, %(product_holdings_m25)s, %(relationship_value_m25)s, %(risk_segment_m25)s, %(behavioral_score_m25)s, %(credit_score_m25)s, %(city_m25)s, %(last_activity_date_m25)s), (%(customer_id_m26)s, %(customer_age_m26)s, %(annual_income_m26)s, %(employment_status_m26)s, %(account_tenure_m26)s, %(product_holdings_m26)s, %(relationship_value_m26)s, %(risk_segment_m26)s, %(behavioral_score_m26)s, %(credit_score_m26)s, %(city_m26)s, %(last_activity_date_m26)s), (%(customer_id_m27)s, %(customer_age_m27)s, %(annual_income_m27)s, %(employment_status_m27)s, %(account_tenure_m27)s, %(product_holdings_m27)s, %(relationship_value_m27)s, %(risk_segment_m27)s, %(behavioral_score_m27)s, %(credit_score_m27)s, %(city_m27)s, %(last_activity_date_m27)s), (%(customer_id_m28)s, %(customer_age_m28)s, %(annual_income_m28)s, %(employment_status_m28)s, %(account_tenure_m28)s, %(product_holdings_m28)s, %(relationship_value_m28)s, %(risk_segment_m28)s, %(behavioral_score_m28)s, %(credit_score_m28)s, %(city_m28)s, %(last_activity_date_m28)s), (%(customer_id_m29)s, %(customer_age_m29)s, %(annual_income_m29)s, %(employment_status_m29)s, %(account_tenure_m29)s, %(product_holdings_m29)s, %(relationship_value_m29)s, %(risk_segment_m29)s, %(behavioral_score_m29)s, %(credit_score_m29)s, %(city_m29)s, %(last_activity_date_m29)s), (%(customer_id_m30)s, %(customer_age_m30)s, %(annual_income_m30)s, %(employment_status_m30)s, %(account_tenure_m30)s, %(product_holdings_m30)s, %(relationship_value_m30)s, %(risk_segment_m30)s, %(behavioral_score_m30)s, %(credit_score_m30)s, %(city_m30)s, %(last_activity_date_m30)s), (%(customer_id_m31)s, %(customer_age_m31)s, %(annual_income_m31)s, %(employment_status_m31)s, %(account_tenure_m31)s, %(product_holdings_m31)s, %(relationship_value_m31)s, %(risk_segment_m31)s, %(behavioral_score_m31)s, %(credit_score_m31)s, %(city_m31)s, %(last_activity_date_m31)s), (%(customer_id_m32)s, %(customer_age_m32)s, %(annual_income_m32)s, %(employment_status_m32)s, %(account_tenure_m32)s, %(product_holdings_m32)s, %(relationship_value_m32)s, %(risk_segment_m32)s, %(behavioral_score_m32)s, %(credit_score_m32)s, %(city_m32)s, %(last_activity_date_m32)s), (%(customer_id_m33)s, %(customer_age_m33)s, %(annual_income_m33)s, %(employment_status_m33)s, %(account_tenure_m33)s, %(product_holdings_m33)s, %(relationship_value_m33)s, %(risk_segment_m33)s, %(behavioral_score_m33)s, %(credit_score_m33)s, %(city_m33)s, %(last_activity_date_m33)s), (%(customer_id_m34)s, %(customer_age_m34)s, %(annual_income_m34)s, %(employment_status_m34)s, %(account_tenure_m34)s, %(product_holdings_m34)s, %(relationship_value_m34)s, %(risk_segment_m34)s, %(behavioral_score_m34)s, %(credit_score_m34)s, %(city_m34)s, %(last_activity_date_m34)s), (%(customer_id_m35)s, %(customer_age_m35)s, %(annual_income_m35)s, %(employment_status_m35)s, %(account_tenure_m35)s, %(product_holdings_m35)s, %(relationship_value_m35)s, %(risk_segment_m35)s, %(behavioral_score_m35)s, %(credit_score_m35)s, %(city_m35)s, %(last_activity_date_m35)s), (%(customer_id_m36)s, %(customer_age_m36)s, %(annual_income_m36)s, %(employment_status_m36)s, %(account_tenure_m36)s, %(product_holdings_m36)s, %(relationship_value_m36)s, %(risk_segment_m36)s, %(behavioral_score_m36)s, %(credit_score_m36)s, %(city_m36)s, %(last_activity_date_m36)s), (%(customer_id_m37)s, %(customer_age_m37)s, %(annual_income_m37)s, %(employment_status_m37)s, %(account_tenure_m37)s, %(product_holdings_m37)s, %(relationship_value_m37)s, %(risk_segment_m37)s, %(behavioral_score_m37)s, %(credit_score_m37)s, %(city_m37)s, %(last_activity_date_m37)s), (%(customer_id_m38)s, %(customer_age_m38)s, %(annual_income_m38)s, %(employment_status_m38)s, %(account_tenure_m38)s, %(product_holdings_m38)s, %(relationship_value_m38)s, %(risk_segment_m38)s, %(behavioral_score_m38)s, %(credit_score_m38)s, %(city_m38)s, %(last_activity_date_m38)s), (%(customer_id_m39)s, %(customer_age_m39)s, %(annual_income_m39)s, %(employment_status_m39)s, %(account_tenure_m39)s, %(product_holdings_m39)s, %(relationship_value_m39)s, %(risk_segment_m39)s, %(behavioral_score_m39)s, %(credit_score_m39)s, %(city_m39)s, %(last_activity_date_m39)s), (%(customer_id_m40)s, %(customer_age_m40)s, %(annual_income_m40)s, %(employment_status_m40)s, %(account_tenure_m40)s, %(product_holdings_m40)s, %(relationship_value_m40)s, %(risk_segment_m40)s, %(behavioral_score_m40)s, %(credit_score_m40)s, %(city_m40)s, %(last_activity_date_m40)s), (%(customer_id_m41)s, %(customer_age_m41)s, %(annual_income_m41)s, %(employment_status_m41)s, %(account_tenure_m41)s, %(product_holdings_m41)s, %(relationship_value_m41)s, %(risk_segment_m41)s, %(behavioral_score_m41)s, %(credit_score_m41)s, %(city_m41)s, %(last_activity_date_m41)s), (%(customer_id_m42)s, %(customer_age_m42)s, %(annual_income_m42)s, %(employment_status_m42)s, %(account_tenure_m42)s, %(product_holdings_m42)s, %(relationship_value_m42)s, %(risk_segment_m42)s, %(behavioral_score_m42)s, %(credit_score_m42)s, %(city_m42)s, %(last_activity_date_m42)s), (%(customer_id_m43)s, %(customer_age_m43)s, %(annual_income_m43)s, %(employment_status_m43)s, %(account_tenure_m43)s, %(product_holdings_m43)s, %(relationship_value_m43)s, %(risk_segment_m43)s, %(behavioral_score_m43)s, %(credit_score_m43)s, %(city_m43)s, %(last_activity_date_m43)s), (%(customer_id_m44)s, %(customer_age_m44)s, %(annual_income_m44)s, %(employment_status_m44)s, %(account_tenure_m44)s, %(product_holdings_m44)s, %(relationship_value_m44)s, %(risk_segment_m44)s, %(behavioral_score_m44)s, %(credit_score_m44)s, %(city_m44)s, %(last_activity_date_m44)s), (%(customer_id_m45)s, %(customer_age_m45)s, %(annual_income_m45)s, %(employment_status_m45)s, %(account_tenure_m45)s, %(product_holdings_m45)s, %(relationship_value_m45)s, %(risk_segment_m45)s, %(behavioral_score_m45)s, %(credit_score_m45)s, %(city_m45)s, %(last_activity_date_m45)s), (%(customer_id_m46)s, %(customer_age_m46)s, %(annual_income_m46)s, %(employment_status_m46)s, %(account_tenure_m46)s, %(product_holdings_m46)s, %(relationship_value_m46)s, %(risk_segment_m46)s, %(behavioral_score_m46)s, %(credit_score_m46)s, %(city_m46)s, %(last_activity_date_m46)s), (%(customer_id_m47)s, %(customer_age_m47)s, %(annual_income_m47)s, %(employment_status_m47)s, %(account_tenure_m47)s, %(product_holdings_m47)s, %(relationship_value_m47)s, %(risk_segment_m47)s, %(behavioral_score_m47)s, %(credit_score_m47)s, %(city_m47)s, %(last_activity_date_m47)s), (%(customer_id_m48)s, %(customer_age_m48)s, %(annual_income_m48)s, %(employment_status_m48)s, %(account_tenure_m48)s, %(product_holdings_m48)s, %(relationship_value_m48)s, %(risk_segment_m48)s, %(behavioral_score_m48)s, %(credit_score_m48)s, %(city_m48)s, %(last_activity_date_m48)s), (%(customer_id_m49)s, %(customer_age_m49)s, %(annual_income_m49)s, %(employment_status_m49)s, %(account_tenure_m49)s, %(product_holdings_m49)s, %(relationship_value_m49)s, %(risk_segment_m49)s, %(behavioral_score_m49)s, %(credit_score_m49)s, %(city_m49)s, %(last_activity_date_m49)s), (%(customer_id_m50)s, %(customer_age_m50)s, %(annual_income_m50)s, %(employment_status_m50)s, %(account_tenure_m50)s, %(product_holdings_m50)s, %(relationship_value_m50)s, %(risk_segment_m50)s, %(behavioral_score_m50)s, %(credit_score_m50)s, %(city_m50)s, %(last_activity_date_m50)s), (%(customer_id_m51)s, %(customer_age_m51)s, %(annual_income_m51)s, %(employment_status_m51)s, %(account_tenure_m51)s, %(product_holdings_m51)s, %(relationship_value_m51)s, %(risk_segment_m51)s, %(behavioral_score_m51)s, %(credit_score_m51)s, %(city_m51)s, %(last_activity_date_m51)s), (%(customer_id_m52)s, %(customer_age_m52)s, %(annual_income_m52)s, %(employment_status_m52)s, %(account_tenure_m52)s, %(product_holdings_m52)s, %(relationship_value_m52)s, %(risk_segment_m52)s, %(behavioral_score_m52)s, %(credit_score_m52)s, %(city_m52)s, %(last_activity_date_m52)s), (%(customer_id_m53)s, %(customer_age_m53)s, %(annual_income_m53)s, %(employment_status_m53)s, %(account_tenure_m53)s, %(product_holdings_m53)s, %(relationship_value_m53)s, %(risk_segment_m53)s, %(behavioral_score_m53)s, %(credit_score_m53)s, %(city_m53)s, %(last_activity_date_m53)s), (%(customer_id_m54)s, %(customer_age_m54)s, %(annual_income_m54)s, %(employment_status_m54)s, %(account_tenure_m54)s, %(product_holdings_m54)s, %(relationship_value_m54)s, %(risk_segment_m54)s, %(behavioral_score_m54)s, %(credit_score_m54)s, %(city_m54)s, %(last_activity_date_m54)s), (%(customer_id_m55)s, %(customer_age_m55)s, %(annual_income_m55)s, %(employment_status_m55)s, %(account_tenure_m55)s, %(product_holdings_m55)s, %(relationship_value_m55)s, %(risk_segment_m55)s, %(behavioral_score_m55)s, %(credit_score_m55)s, %(city_m55)s, %(last_activity_date_m55)s), (%(customer_id_m56)s, %(customer_age_m56)s, %(annual_income_m56)s, %(employment_status_m56)s, %(account_tenure_m56)s, %(product_holdings_m56)s, %(relationship_value_m56)s, %(risk_segment_m56)s, %(behavioral_score_m56)s, %(credit_score_m56)s, %(city_m56)s, %(last_activity_date_m56)s), (%(customer_id_m57)s, %(customer_age_m57)s, %(annual_income_m57)s, %(employment_status_m57)s, %(account_tenure_m57)s, %(product_holdings_m57)s, %(relationship_value_m57)s, %(risk_segment_m57)s, %(behavioral_score_m57)s, %(credit_score_m57)s, %(city_m57)s, %(last_activity_date_m57)s), (%(customer_id_m58)s, %(customer_age_m58)s, %(annual_income_m58)s, %(employment_status_m58)s, %(account_tenure_m58)s, %(product_holdings_m58)s, %(relationship_value_m58)s, %(risk_segment_m58)s, %(behavioral_score_m58)s, %(credit_score_m58)s, %(city_m58)s, %(last_activity_date_m58)s), (%(customer_id_m59)s, %(customer_age_m59)s, %(annual_income_m59)s, %(employment_status_m59)s, %(account_tenure_m59)s, %(product_holdings_m59)s, %(relationship_value_m59)s, %(risk_segment_m59)s, %(behavioral_score_m59)s, %(credit_score_m59)s, %(city_m59)s, %(last_activity_date_m59)s), (%(customer_id_m60)s, %(customer_age_m60)s, %(annual_income_m60)s, %(employment_status_m60)s, %(account_tenure_m60)s, %(product_holdings_m60)s, %(relationship_value_m60)s, %(risk_segment_m60)s, %(behavioral_score_m60)s, %(credit_score_m60)s, %(city_m60)s, %(last_activity_date_m60)s), (%(customer_id_m61)s, %(customer_age_m61)s, %(annual_income_m61)s, %(employment_status_m61)s, %(account_tenure_m61)s, %(product_holdings_m61)s, %(relationship_value_m61)s, %(risk_segment_m61)s, %(behavioral_score_m61)s, %(credit_score_m61)s, %(city_m61)s, %(last_activity_date_m61)s), (%(customer_id_m62)s, %(customer_age_m62)s, %(annual_income_m62)s, %(employment_status_m62)s, %(account_tenure_m62)s, %(product_holdings_m62)s, %(relationship_value_m62)s, %(risk_segment_m62)s, %(behavioral_score_m62)s, %(credit_score_m62)s, %(city_m62)s, %(last_activity_date_m62)s), (%(customer_id_m63)s, %(customer_age_m63)s, %(annual_income_m63)s, %(employment_status_m63)s, %(account_tenure_m63)s, %(product_holdings_m63)s, %(relationship_value_m63)s, %(risk_segment_m63)s, %(behavioral_score_m63)s, %(credit_score_m63)s, %(city_m63)s, %(last_activity_date_m63)s), (%(customer_id_m64)s, %(customer_age_m64)s, %(annual_income_m64)s, %(employment_status_m64)s, %(account_tenure_m64)s, %(product_holdings_m64)s, %(relationship_value_m64)s, %(risk_segment_m64)s, %(behavioral_score_m64)s, %(credit_score_m64)s, %(city_m64)s, %(last_activity_date_m64)s), (%(customer_id_m65)s, %(customer_age_m65)s, %(annual_income_m65)s, %(employment_status_m65)s, %(account_tenure_m65)s, %(product_holdings_m65)s, %(relationship_value_m65)s, %(risk_segment_m65)s, %(behavioral_score_m65)s, %(credit_score_m65)s, %(city_m65)s, %(last_activity_date_m65)s), (%(customer_id_m66)s, %(customer_age_m66)s, %(annual_income_m66)s, %(employment_status_m66)s, %(account_tenure_m66)s, %(product_holdings_m66)s, %(relationship_value_m66)s, %(risk_segment_m66)s, %(behavioral_score_m66)s, %(credit_score_m66)s, %(city_m66)s, %(last_activity_date_m66)s), (%(customer_id_m67)s, %(customer_age_m67)s, %(annual_income_m67)s, %(employment_status_m67)s, %(account_tenure_m67)s, %(product_holdings_m67)s, %(relationship_value_m67)s, %(risk_segment_m67)s, %(behavioral_score_m67)s, %(credit_score_m67)s, %(city_m67)s, %(last_activity_date_m67)s), (%(customer_id_m68)s, %(customer_age_m68)s, %(annual_income_m68)s, %(employment_status_m68)s, %(account_tenure_m68)s, %(product_holdings_m68)s, %(relationship_value_m68)s, %(risk_segment_m68)s, %(behavioral_score_m68)s, %(credit_score_m68)s, %(city_m68)s, %(last_activity_date_m68)s), (%(customer_id_m69)s, %(customer_age_m69)s, %(annual_income_m69)s, %(employment_status_m69)s, %(account_tenure_m69)s, %(product_holdings_m69)s, %(relationship_value_m69)s, %(risk_segment_m69)s, %(behavioral_score_m69)s, %(credit_score_m69)s, %(city_m69)s, %(last_activity_date_m69)s), (%(customer_id_m70)s, %(customer_age_m70)s, %(annual_income_m70)s, %(employment_status_m70)s, %(account_tenure_m70)s, %(product_holdings_m70)s, %(relationship_value_m70)s, %(risk_segment_m70)s, %(behavioral_score_m70)s, %(credit_score_m70)s, %(city_m70)s, %(last_activity_date_m70)s), (%(customer_id_m71)s, %(customer_age_m71)s, %(annual_income_m71)s, %(employment_status_m71)s, %(account_tenure_m71)s, %(product_holdings_m71)s, %(relationship_value_m71)s, %(risk_segment_m71)s, %(behavioral_score_m71)s, %(credit_score_m71)s, %(city_m71)s, %(last_activity_date_m71)s), (%(customer_id_m72)s, %(customer_age_m72)s, %(annual_income_m72)s, %(employment_status_m72)s, %(account_tenure_m72)s, %(product_holdings_m72)s, %(relationship_value_m72)s, %(risk_segment_m72)s, %(behavioral_score_m72)s, %(credit_score_m72)s, %(city_m72)s, %(last_activity_date_m72)s), (%(customer_id_m73)s, %(customer_age_m73)s, %(annual_income_m73)s, %(employment_status_m73)s, %(account_tenure_m73)s, %(product_holdings_m73)s, %(relationship_value_m73)s, %(risk_segment_m73)s, %(behavioral_score_m73)s, %(credit_score_m73)s, %(city_m73)s, %(last_activity_date_m73)s), (%(customer_id_m74)s, %(customer_age_m74)s, %(annual_income_m74)s, %(employment_status_m74)s, %(account_tenure_m74)s, %(product_holdings_m74)s, %(relationship_value_m74)s, %(risk_segment_m74)s, %(behavioral_score_m74)s, %(credit_score_m74)s, %(city_m74)s, %(last_activity_date_m74)s), (%(customer_id_m75)s, %(customer_age_m75)s, %(annual_income_m75)s, %(employment_status_m75)s, %(account_tenure_m75)s, %(product_holdings_m75)s, %(relationship_value_m75)s, %(risk_segment_m75)s, %(behavioral_score_m75)s, %(credit_score_m75)s, %(city_m75)s, %(last_activity_date_m75)s), (%(customer_id_m76)s, %(customer_age_m76)s, %(annual_income_m76)s, %(employment_status_m76)s, %(account_tenure_m76)s, %(product_holdings_m76)s, %(relationship_value_m76)s, %(risk_segment_m76)s, %(behavioral_score_m76)s, %(credit_score_m76)s, %(city_m76)s, %(last_activity_date_m76)s), (%(customer_id_m77)s, %(customer_age_m77)s, %(annual_income_m77)s, %(employment_status_m77)s, %(account_tenure_m77)s, %(product_holdings_m77)s, %(relationship_value_m77)s, %(risk_segment_m77)s, %(behavioral_score_m77)s, %(credit_score_m77)s, %(city_m77)s, %(last_activity_date_m77)s), (%(customer_id_m78)s, %(customer_age_m78)s, %(annual_income_m78)s, %(employment_status_m78)s, %(account_tenure_m78)s, %(product_holdings_m78)s, %(relationship_value_m78)s, %(risk_segment_m78)s, %(behavioral_score_m78)s, %(credit_score_m78)s, %(city_m78)s, %(last_activity_date_m78)s), (%(customer_id_m79)s, %(customer_age_m79)s, %(annual_income_m79)s, %(employment_status_m79)s, %(account_tenure_m79)s, %(product_holdings_m79)s, %(relationship_value_m79)s, %(risk_segment_m79)s, %(behavioral_score_m79)s, %(credit_score_m79)s, %(city_m79)s, %(last_activity_date_m79)s), (%(customer_id_m80)s, %(customer_age_m80)s, %(annual_income_m80)s, %(employment_status_m80)s, %(account_tenure_m80)s, %(product_holdings_m80)s, %(relationship_value_m80)s, %(risk_segment_m80)s, %(behavioral_score_m80)s, %(credit_score_m80)s, %(city_m80)s, %(last_activity_date_m80)s), (%(customer_id_m81)s, %(customer_age_m81)s, %(annual_income_m81)s, %(employment_status_m81)s, %(account_tenure_m81)s, %(product_holdings_m81)s, %(relationship_value_m81)s, %(risk_segment_m81)s, %(behavioral_score_m81)s, %(credit_score_m81)s, %(city_m81)s, %(last_activity_date_m81)s), (%(customer_id_m82)s, %(customer_age_m82)s, %(annual_income_m82)s, %(employment_status_m82)s, %(account_tenure_m82)s, %(product_holdings_m82)s, %(relationship_value_m82)s, %(risk_segment_m82)s, %(behavioral_score_m82)s, %(credit_score_m82)s, %(city_m82)s, %(last_activity_date_m82)s), (%(customer_id_m83)s, %(customer_age_m83)s, %(annual_income_m83)s, %(employment_status_m83)s, %(account_tenure_m83)s, %(product_holdings_m83)s, %(relationship_value_m83)s, %(risk_segment_m83)s, %(behavioral_score_m83)s, %(credit_score_m83)s, %(city_m83)s, %(last_activity_date_m83)s), (%(customer_id_m84)s, %(customer_age_m84)s, %(annual_income_m84)s, %(employment_status_m84)s, %(account_tenure_m84)s, %(product_holdings_m84)s, %(relationship_value_m84)s, %(risk_segment_m84)s, %(behavioral_score_m84)s, %(credit_score_m84)s, %(city_m84)s, %(last_activity_date_m84)s), (%(customer_id_m85)s, %(customer_age_m85)s, %(annual_income_m85)s, %(employment_status_m85)s, %(account_tenure_m85)s, %(product_holdings_m85)s, %(relationship_value_m85)s, %(risk_segment_m85)s, %(behavioral_score_m85)s, %(credit_score_m85)s, %(city_m85)s, %(last_activity_date_m85)s), (%(customer_id_m86)s, %(customer_age_m86)s, %(annual_income_m86)s, %(employment_status_m86)s, %(account_tenure_m86)s, %(product_holdings_m86)s, %(relationship_value_m86)s, %(risk_segment_m86)s, %(behavioral_score_m86)s, %(credit_score_m86)s, %(city_m86)s, %(last_activity_date_m86)s), (%(customer_id_m87)s, %(customer_age_m87)s, %(annual_income_m87)s, %(employment_status_m87)s, %(account_tenure_m87)s, %(product_holdings_m87)s, %(relationship_value_m87)s, %(risk_segment_m87)s, %(behavioral_score_m87)s, %(credit_score_m87)s, %(city_m87)s, %(last_activity_date_m87)s), (%(customer_id_m88)s, %(customer_age_m88)s, %(annual_income_m88)s, %(employment_status_m88)s, %(account_tenure_m88)s, %(product_holdings_m88)s, %(relationship_value_m88)s, %(risk_segment_m88)s, %(behavioral_score_m88)s, %(credit_score_m88)s, %(city_m88)s, %(last_activity_date_m88)s), (%(customer_id_m89)s, %(customer_age_m89)s, %(annual_income_m89)s, %(employment_status_m89)s, %(account_tenure_m89)s, %(product_holdings_m89)s, %(relationship_value_m89)s, %(risk_segment_m89)s, %(behavioral_score_m89)s, %(credit_score_m89)s, %(city_m89)s, %(last_activity_date_m89)s), (%(customer_id_m90)s, %(customer_age_m90)s, %(annual_income_m90)s, %(employment_status_m90)s, %(account_tenure_m90)s, %(product_holdings_m90)s, %(relationship_value_m90)s, %(risk_segment_m90)s, %(behavioral_score_m90)s, %(credit_score_m90)s, %(city_m90)s, %(last_activity_date_m90)s), (%(customer_id_m91)s, %(customer_age_m91)s, %(annual_income_m91)s, %(employment_status_m91)s, %(account_tenure_m91)s, %(product_holdings_m91)s, %(relationship_value_m91)s, %(risk_segment_m91)s, %(behavioral_score_m91)s, %(credit_score_m91)s, %(city_m91)s, %(last_activity_date_m91)s), (%(customer_id_m92)s, %(customer_age_m92)s, %(annual_income_m92)s, %(employment_status_m92)s, %(account_tenure_m92)s, %(product_holdings_m92)s, %(relationship_value_m92)s, %(risk_segment_m92)s, %(behavioral_score_m92)s, %(credit_score_m92)s, %(city_m92)s, %(last_activity_date_m92)s), (%(customer_id_m93)s, %(customer_age_m93)s, %(annual_income_m93)s, %(employment_status_m93)s, %(account_tenure_m93)s, %(product_holdings_m93)s, %(relationship_value_m93)s, %(risk_segment_m93)s, %(behavioral_score_m93)s, %(credit_score_m93)s, %(city_m93)s, %(last_activity_date_m93)s), (%(customer_id_m94)s, %(customer_age_m94)s, %(annual_income_m94)s, %(employment_status_m94)s, %(account_tenure_m94)s, %(product_holdings_m94)s, %(relationship_value_m94)s, %(risk_segment_m94)s, %(behavioral_score_m94)s, %(credit_score_m94)s, %(city_m94)s, %(last_activity_date_m94)s), (%(customer_id_m95)s, %(customer_age_m95)s, %(annual_income_m95)s, %(employment_status_m95)s, %(account_tenure_m95)s, %(product_holdings_m95)s, %(relationship_value_m95)s, %(risk_segment_m95)s, %(behavioral_score_m95)s, %(credit_score_m95)s, %(city_m95)s, %(last_activity_date_m95)s), (%(customer_id_m96)s, %(customer_age_m96)s, %(annual_income_m96)s, %(employment_status_m96)s, %(account_tenure_m96)s, %(product_holdings_m96)s, %(relationship_value_m96)s, %(risk_segment_m96)s, %(behavioral_score_m96)s, %(credit_score_m96)s, %(city_m96)s, %(last_activity_date_m96)s), (%(customer_id_m97)s, %(customer_age_m97)s, %(annual_income_m97)s, %(employment_status_m97)s, %(account_tenure_m97)s, %(product_holdings_m97)s, %(relationship_value_m97)s, %(risk_segment_m97)s, %(behavioral_score_m97)s, %(credit_score_m97)s, %(city_m97)s, %(last_activity_date_m97)s), (%(customer_id_m98)s, %(customer_age_m98)s, %(annual_income_m98)s, %(employment_status_m98)s, %(account_tenure_m98)s, %(product_holdings_m98)s, %(relationship_value_m98)s, %(risk_segment_m98)s, %(behavioral_score_m98)s, %(credit_score_m98)s, %(city_m98)s, %(last_activity_date_m98)s), (%(customer_id_m99)s, %(customer_age_m99)s, %(annual_income_m99)s, %(employment_status_m99)s, %(account_tenure_m99)s, %(product_holdings_m99)s, %(relationship_value_m99)s, %(risk_segment_m99)s, %(behavioral_score_m99)s, %(credit_score_m99)s, %(city_m99)s, %(last_activity_date_m99)s)]
[parameters: {'customer_id_m0': 'CUST_000001', 'customer_age_m0': 47, 'annual_income_m0': 57726.04, 'employment_status_m0': 'Full-time', 'account_tenure_m0': 2, 'product_holdings_m0': 2, 'relationship_value_m0': 13854.25, 'risk_segment_m0': 'Near-Prime', 'behavioral_score_m0': 221.73, 'credit_score_m0': 742, 'city_m0': 'Newcastle', 'last_activity_date_m0': '2025-07-14', 'customer_id_m1': 'CUST_000002', 'customer_age_m1': 22, 'annual_income_m1': 37938.4, 'employment_status_m1': 'Full-time', 'account_tenure_m1': 3, 'product_holdings_m1': 2, 'relationship_value_m1': 9863.98, 'risk_segment_m1': 'Subprime', 'behavioral_score_m1': 586.23, 'credit_score_m1': 565, 'city_m1': 'Birmingham', 'last_activity_date_m1': '2025-07-10', 'customer_id_m2': 'CUST_000003', 'customer_age_m2': 37, 'annual_income_m2': 68477.75, 'employment_status_m2': 'Self-employed', 'account_tenure_m2': 2, 'product_holdings_m2': 5, 'relationship_value_m2': 41086.65, 'risk_segment_m2': 'Prime', 'behavioral_score_m2': 23.43, 'credit_score_m2': 754, 'city_m2': 'Liverpool', 'last_activity_date_m2': '2025-07-22', 'customer_id_m3': 'CUST_000004', 'customer_age_m3': 71, 'annual_income_m3': 94196.78, 'employment_status_m3': 'Retired', 'account_tenure_m3': 5, 'product_holdings_m3': 5, 'relationship_value_m3': 70647.59, 'risk_segment_m3': 'Prime', 'behavioral_score_m3': 484.24, 'credit_score_m3': 792, 'city_m3': 'Manchester', 'last_activity_date_m3': '2025-07-20', 'customer_id_m4': 'CUST_000005', 'customer_age_m4': 54 ... 1100 parameters truncated ... 'city_m95': 'Leeds', 'last_activity_date_m95': '2025-07-31', 'customer_id_m96': 'CUST_000097', 'customer_age_m96': 18, 'annual_income_m96': 29788.35, 'employment_status_m96': 'Full-time', 'account_tenure_m96': 5, 'product_holdings_m96': 1, 'relationship_value_m96': 4468.25, 'risk_segment_m96': 'Subprime', 'behavioral_score_m96': 171.94, 'credit_score_m96': 635, 'city_m96': 'Cardiff', 'last_activity_date_m96': '2025-07-15', 'customer_id_m97': 'CUST_000098', 'customer_age_m97': 40, 'annual_income_m97': 36680.32, 'employment_status_m97': 'Part-time', 'account_tenure_m97': 0, 'product_holdings_m97': 4, 'relationship_value_m97': 14672.13, 'risk_segment_m97': 'Prime', 'behavioral_score_m97': 406.51, 'credit_score_m97': 767, 'city_m97': 'Sheffield', 'last_activity_date_m97': '2025-07-21', 'customer_id_m98': 'CUST_000099', 'customer_age_m98': 45, 'annual_income_m98': 52496.79, 'employment_status_m98': 'Full-time', 'account_tenure_m98': 2, 'product_holdings_m98': 2, 'relationship_value_m98': 12599.23, 'risk_segment_m98': 'Near-Prime', 'behavioral_score_m98': 119.36, 'credit_score_m98': 742, 'city_m98': 'Leeds', 'last_activity_date_m98': '2025-07-15', 'customer_id_m99': 'CUST_000100', 'customer_age_m99': 37, 'annual_income_m99': 54588.02, 'employment_status_m99': 'Full-time', 'account_tenure_m99': 0, 'product_holdings_m99': 3, 'relationship_value_m99': 16376.41, 'risk_segment_m99': 'Prime', 'behavioral_score_m99': 562.18, 'credit_score_m99': 850, 'city_m99': 'Glasgow', 'last_activity_date_m99': '2025-07-20'}]
(Background on this error at: https://sqlalche.me/e/20/gkpj)

In [None]:
with engine.begin() as conn:
    # Dimension: Customer Profiles
    conn.execute(text("""
    CREATE TABLE IF NOT EXISTS dim_customer_profiles (
        customer_id VARCHAR PRIMARY KEY,
        customer_age INT,
        annual_income NUMERIC(15,2),
        employment_status VARCHAR,
        account_tenure INT,
        product_holdings INT,
        relationship_value NUMERIC(15,2),
        risk_segment VARCHAR,
        behavioral_score NUMERIC(10,2),
        credit_score INT,
        city VARCHAR,
        last_activity_date DATE
    );
    """))

    # Dimension: Bureau Data
    conn.execute(text("""
    CREATE TABLE IF NOT EXISTS dim_bureau (
        customer_id VARCHAR PRIMARY KEY,
        credit_score INT,
        credit_history_length INT,
        number_of_accounts INT,
        total_credit_limit NUMERIC(15,2),
        credit_utilization NUMERIC(6,3),
        payment_history NUMERIC(6,3),
        public_records INT
    );
    """))

    # Fact: Applications
    conn.execute(text("""
    CREATE TABLE IF NOT EXISTS fact_applications (
        application_id VARCHAR PRIMARY KEY,
        customer_id VARCHAR REFERENCES dim_customer_profiles(customer_id),
        application_date DATE,
        loan_amount NUMERIC(15,2),
        loan_purpose VARCHAR,
        employment_status VARCHAR,
        annual_income NUMERIC(15,2),
        debt_to_income_ratio NUMERIC(6,3),
        credit_score INT,
        application_status VARCHAR,
        default_flag INT
    );
    """))

    # Fact: Predictions
    conn.execute(text("""
    CREATE TABLE IF NOT EXISTS fact_predictions (
        prediction_id VARCHAR PRIMARY KEY,
        model_version VARCHAR,
        customer_id VARCHAR REFERENCES dim_customer_profiles(customer_id),
        prediction_date DATE,
        prediction_type VARCHAR,
        risk_score NUMERIC(10,2),
        fraud_probability NUMERIC(6,3),
        model_features JSONB,
        prediction_explanation TEXT,
        business_decision VARCHAR,
        actual_outcome VARCHAR
    );
    """))

    # Fact: Transactions
    conn.execute(text("""
    CREATE TABLE IF NOT EXISTS fact_transactions (
        transaction_id VARCHAR PRIMARY KEY,
        customer_id VARCHAR REFERENCES dim_customer_profiles(customer_id),
        transaction_date TIMESTAMP,
        amount NUMERIC(15,2),
        merchant_category VARCHAR,
        transaction_type VARCHAR,
        location VARCHAR,
        device_info VARCHAR,
        fraud_flag INT,
        investigation_status VARCHAR
    );
    """))


    # --- UPLOAD DATA TO POSTGRES ---

    applications.to_sql("fact_applications", con=engine, if_exists="append", index=False)
    bureau.to_sql("dim_bureau", con=engine, if_exists="append", index=False)
    profiles.to_sql("dim_customer_profiles", con=engine, if_exists="append", index=False)
    predictions.to_sql("fact_predictions", con=engine, if_exists="append", index=False)
    transactions.to_sql("fact_transactions", con=engine, if_exists="append", index=False)

    print("All datasets loaded from S3 and inserted into PostgreSQL successfully.")