In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, countDistinct

# 创建Spark会话
spark = SparkSession.builder \
    .appName("Gold Layer - Initial Analysis") \
    .config("spark.driver.memory", "2g") \
    .config("spark.sql.execution.arrow.pyspark.enabled", "true") \
    .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
    .getOrCreate()

# 定义Silver层表路径
silver_tables = {
    "loan": "datamart/silver/silver_loan",
    "attributes": "datamart/silver/silver_attributes",
    "financials": "datamart/silver/silver_financials",
    "clickstream_detailed": "datamart/silver/silver_clickstream_detailed",
    "clickstream_aggregated": "datamart/silver/silver_clickstream_aggregated"
}

# 1. 加载所有Silver层表并检查结构
silver_dfs = {}
print("=== Silver层表结构检查 ===")

for table_name, path in silver_tables.items():
    try:
        df = spark.read.parquet(path)
        silver_dfs[table_name] = df
        
        print(f"\n{table_name}表结构:")
        print(f"- 行数: {df.count()}")
        print(f"- 列数: {len(df.columns)}")
        print(f"- 包含Customer_ID列: {'Customer_ID' in df.columns}")
        
        # 如果有Customer_ID列，检查唯一值
        if 'Customer_ID' in df.columns:
            unique_customers = df.select("Customer_ID").distinct().count()
            print(f"- 唯一Customer_ID数量: {unique_customers}")
    except Exception as e:
        print(f"读取{table_name}表时出错: {e}")

# 2. 检查所有表的Customer_ID交集
print("\n=== 检查Customer_ID交集 ===")

# 收集每个表的唯一Customer_ID集合
customer_sets = {}
for table_name, df in silver_dfs.items():
    if 'Customer_ID' in df.columns:
        # 将DataFrame转换为Python集合
        customer_ids = set([row.Customer_ID for row in df.select("Customer_ID").distinct().collect()])
        customer_sets[table_name] = customer_ids
        print(f"{table_name}表中的唯一客户数: {len(customer_ids)}")

# 计算所有表的交集
if customer_sets:
    all_customers = set.intersection(*customer_sets.values())
    print(f"\n所有表共有的唯一客户数: {len(all_customers)}")
    
    # 检查每个表中缺失的客户
    for table_name, customers in customer_sets.items():
        missing = len(customers - all_customers)
        if missing > 0:
            print(f"{table_name}表中有{missing}个客户在其他表中不存在")
else:
    print("没有表包含Customer_ID列")

# 3. 检查Gold层的潜在行数
if 'loan' in silver_dfs and 'Customer_ID' in silver_dfs['loan'].columns:
    # 贷款表通常是主表，因为它包含标签
    loan_df = silver_dfs['loan']
    
    # 检查每个客户的贷款记录数
    print("\n=== 贷款记录分析 ===")
    loans_per_customer = loan_df.groupBy("Customer_ID").count()
    print("每个客户的贷款记录数:")
    loans_per_customer.describe().show()
    
    # 检查最新的贷款记录
    print("\n检查贷款记录的snapshot_date分布:")
    loan_df.groupBy("snapshot_date").count().orderBy("snapshot_date").show(10)

=== Silver层表结构检查 ===

loan表结构:
- 行数: 137500
- 列数: 27
- 包含Customer_ID列: True
- 唯一Customer_ID数量: 12500

attributes表结构:
- 行数: 12500
- 列数: 11
- 包含Customer_ID列: True
- 唯一Customer_ID数量: 12500

financials表结构:
- 行数: 12500
- 列数: 35
- 包含Customer_ID列: True
- 唯一Customer_ID数量: 12500

clickstream_detailed表结构:
- 行数: 215376
- 列数: 71
- 包含Customer_ID列: True
- 唯一Customer_ID数量: 8974

clickstream_aggregated表结构:
- 行数: 8974
- 列数: 106
- 包含Customer_ID列: True
- 唯一Customer_ID数量: 8974

=== 检查Customer_ID交集 ===
loan表中的唯一客户数: 12500
attributes表中的唯一客户数: 12500
financials表中的唯一客户数: 12500
clickstream_detailed表中的唯一客户数: 8974
clickstream_aggregated表中的唯一客户数: 8974

所有表共有的唯一客户数: 8974
loan表中有3526个客户在其他表中不存在
attributes表中有3526个客户在其他表中不存在
financials表中有3526个客户在其他表中不存在

=== 贷款记录分析 ===
每个客户的贷款记录数:
+-------+-----------+-----+
|summary|Customer_ID|count|
+-------+-----------+-----+
|  count|      12500|12500|
|   mean|       NULL| 11.0|
| stddev|       NULL|  0.0|
|    min| CUS_0x1000|   11|
|    max|  CUS_0xffd|   11|
+-------+--------

In [4]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import min, max, count, countDistinct, year, month

# 创建Spark会话
spark = SparkSession.builder \
    .appName("Silver Layer Time Analysis") \
    .config("spark.driver.memory", "2g") \
    .config("spark.sql.execution.arrow.pyspark.enabled", "true") \
    .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
    .getOrCreate()

# 定义Silver层表路径
silver_tables = {
    "loan": "datamart/silver/silver_loan",
    "attributes": "datamart/silver/silver_attributes",
    "financials": "datamart/silver/silver_financials",
    "clickstream_detailed": "datamart/silver/silver_clickstream_detailed",
    "clickstream_aggregated": "datamart/silver/silver_clickstream_aggregated"
}

print("=== Silver层表时间分析 ===\n")

# 加载所有Silver层表并分析时间字段
for table_name, path in silver_tables.items():
    try:
        df = spark.read.parquet(path)
        
        print(f"\n表: {table_name}")
        print(f"行数: {df.count()}")
        
        # 检查是否包含snapshot_date列
        if "snapshot_date" in df.columns:
            # 分析snapshot_date范围
            date_stats = df.select(
                min("snapshot_date").alias("最早日期"),
                max("snapshot_date").alias("最晚日期"),
                countDistinct("snapshot_date").alias("不同日期数")
            ).collect()[0]
            
            print(f"快照日期范围: {date_stats['最早日期']} 到 {date_stats['最晚日期']}")
            print(f"不同快照日期数: {date_stats['不同日期数']}")
            
            # 显示按年月的数据分布
            print("\n按年月的数据分布:")
            df.groupBy(
                year("snapshot_date").alias("年份"),
                month("snapshot_date").alias("月份")
            ).count().orderBy("年份", "月份").show(40)
        
        # 对于贷款数据，还要检查loan_start_date
        if "loan_start_date" in df.columns:
            loan_date_stats = df.select(
                min("loan_start_date").alias("最早贷款日期"),
                max("loan_start_date").alias("最晚贷款日期"),
                countDistinct("loan_start_date").alias("不同贷款日期数")
            ).collect()[0]
            
            print(f"\n贷款开始日期范围: {loan_date_stats['最早贷款日期']} 到 {loan_date_stats['最晚贷款日期']}")
            print(f"不同贷款开始日期数: {loan_date_stats['不同贷款日期数']}")
            
            # 显示按年月的贷款开始日期分布
            print("\n按年月的贷款开始日期分布:")
            df.groupBy(
                year("loan_start_date").alias("年份"),
                month("loan_start_date").alias("月份")
            ).count().orderBy("年份", "月份").show(24)
            
        # 对于点击流聚合数据，检查latest_snapshot_date
        if "latest_snapshot_date" in df.columns:
            latest_date_stats = df.select(
                min("latest_snapshot_date").alias("最早最新日期"),
                max("latest_snapshot_date").alias("最晚最新日期")
            ).collect()[0]
            
            print(f"\n最新快照日期范围: {latest_date_stats['最早最新日期']} 到 {latest_date_stats['最晚最新日期']}")
            
        # 检查silver_process_timestamp
        if "silver_process_timestamp" in df.columns:
            process_stats = df.select(
                min("silver_process_timestamp").alias("最早处理时间"),
                max("silver_process_timestamp").alias("最晚处理时间")
            ).collect()[0]
            
            print(f"\n处理时间范围: {process_stats['最早处理时间']} 到 {process_stats['最晚处理时间']}")
            
    except Exception as e:
        print(f"处理{table_name}表时出错: {e}")

=== Silver层表时间分析 ===


表: loan
行数: 137500
快照日期范围: 2023-01-01 到 2025-11-01
不同快照日期数: 35

按年月的数据分布:
+----+----+-----+
|年份|月份|count|
+----+----+-----+
|2023|   1|  530|
|2023|   2| 1031|
|2023|   3| 1537|
|2023|   4| 2047|
|2023|   5| 2568|
|2023|   6| 3085|
|2023|   7| 3556|
|2023|   8| 4037|
|2023|   9| 4491|
|2023|  10| 4978|
|2023|  11| 5469|
|2023|  12| 5428|
|2024|   1| 5412|
|2024|   2| 5424|
|2024|   3| 5425|
|2024|   4| 5417|
|2024|   5| 5391|
|2024|   6| 5418|
|2024|   7| 5442|
|2024|   8| 5531|
|2024|   9| 5537|
|2024|  10| 5502|
|2024|  11| 5501|
|2024|  12| 5531|
|2025|   1| 5539|
|2025|   2| 5028|
|2025|   3| 4515|
|2025|   4| 4024|
|2025|   5| 3526|
|2025|   6| 3021|
|2025|   7| 2478|
|2025|   8| 1985|
|2025|   9| 1529|
|2025|  10| 1041|
|2025|  11|  526|
+----+----+-----+


贷款开始日期范围: 2023-01-01 到 2025-01-01
不同贷款开始日期数: 25

按年月的贷款开始日期分布:
+----+----+-----+
|年份|月份|count|
+----+----+-----+
|2023|   1| 5830|
|2023|   2| 5511|
|2023|   3| 5566|
|2023|   4| 5610|
|2023|   5| 5731|


In [2]:
from pyspark.sql.functions import col, countDistinct

# 加载Bronze层数据
bronze_loan = spark.read.parquet("datamart/bronze/bronze_loan_daily")
bronze_attributes = spark.read.parquet("datamart/bronze/bronze_attributes")
bronze_financials = spark.read.parquet("datamart/bronze/bronze_financials")
bronze_clickstream = spark.read.parquet("datamart/bronze/bronze_clickstream")

# 分析Bronze层的客户ID
print("=== Bronze层客户ID分析 ===")
bronze_tables = {
    "loan": bronze_loan,
    "attributes": bronze_attributes,
    "financials": bronze_financials,
    "clickstream": bronze_clickstream
}

for name, df in bronze_tables.items():
    if "Customer_ID" in df.columns:
        unique_count = df.select("Customer_ID").distinct().count()
        print(f"{name}表中的唯一客户数: {unique_count}")

# 比较Bronze和Silver层的客户ID
print("\n=== Bronze与Silver层客户ID比较 ===")
# 收集Bronze层的唯一客户ID
bronze_customers = {}
for name, df in bronze_tables.items():
    if "Customer_ID" in df.columns:
        customer_ids = set([row.Customer_ID for row in df.select("Customer_ID").distinct().collect()])
        bronze_customers[name] = customer_ids

# 收集Silver层的唯一客户ID (使用已有的customer_sets)
for name in ["loan", "attributes", "financials", "clickstream_detailed"]:
    if name in customer_sets and name.replace("_detailed", "") in bronze_customers:
        bronze_name = name.replace("_detailed", "")
        bronze_count = len(bronze_customers[bronze_name])
        silver_count = len(customer_sets[name])
        print(f"{name}表: Bronze有{bronze_count}个客户, Silver有{silver_count}个客户")
        
        # 检查是否有客户在处理过程中丢失
        if bronze_count != silver_count:
            bronze_set = bronze_customers[bronze_name]
            silver_set = customer_sets[name]
            lost_customers = bronze_set - silver_set
            new_customers = silver_set - bronze_set
            
            print(f"  - 处理中丢失的客户数: {len(lost_customers)}")
            print(f"  - 处理中新增的客户数: {len(new_customers)}")
            
            # 显示一些示例
            if lost_customers:
                print(f"  - 丢失客户示例: {list(lost_customers)[:5]}")
            if new_customers:
                print(f"  - 新增客户示例: {list(new_customers)[:5]}")

# 检查原始数据中各表的客户ID交集
print("\n=== Bronze层客户ID交集 ===")
all_bronze_customers = set.intersection(*bronze_customers.values())
print(f"所有Bronze表共有的唯一客户数: {len(all_bronze_customers)}")

# 分析clickstream表特殊情况
print("\n=== Clickstream表特殊分析 ===")
# 检查clickstream表是否包含所有贷款表的客户
loan_customers = bronze_customers["loan"]
clickstream_customers = bronze_customers["clickstream"]

# 在贷款表中但不在clickstream表中的客户
loan_only = loan_customers - clickstream_customers
print(f"仅在贷款表中存在的客户数: {len(loan_only)}")

# 检查clickstream数据是否有特定的模式
if "snapshot_date" in bronze_clickstream.columns:
    print("\nClickstream表的snapshot_date分布:")
    bronze_clickstream.groupBy("snapshot_date").count().orderBy("snapshot_date").show(10)

# 检查bronze_clickstream中的客户记录分布
print("\nClickstream表中每个客户的记录数分布:")
bronze_clickstream.groupBy("Customer_ID").count().describe().show()

=== Bronze层客户ID分析 ===
loan表中的唯一客户数: 12500
attributes表中的唯一客户数: 12500
financials表中的唯一客户数: 12500
clickstream表中的唯一客户数: 8974

=== Bronze与Silver层客户ID比较 ===
loan表: Bronze有12500个客户, Silver有12500个客户
attributes表: Bronze有12500个客户, Silver有12500个客户
financials表: Bronze有12500个客户, Silver有12500个客户
clickstream_detailed表: Bronze有8974个客户, Silver有8974个客户

=== Bronze层客户ID交集 ===
所有Bronze表共有的唯一客户数: 8974

=== Clickstream表特殊分析 ===
仅在贷款表中存在的客户数: 3526

Clickstream表的snapshot_date分布:
+-------------+-----+
|snapshot_date|count|
+-------------+-----+
|   2023-01-01| 8974|
|   2023-02-01| 8974|
|   2023-03-01| 8974|
|   2023-04-01| 8974|
|   2023-05-01| 8974|
|   2023-06-01| 8974|
|   2023-07-01| 8974|
|   2023-08-01| 8974|
|   2023-09-01| 8974|
|   2023-10-01| 8974|
+-------------+-----+
only showing top 10 rows


Clickstream表中每个客户的记录数分布:
+-------+-----------+-----+
|summary|Customer_ID|count|
+-------+-----------+-----+
|  count|       8974| 8974|
|   mean|       NULL| 24.0|
| stddev|       NULL|  0.0|
|    min| CUS

In [11]:
from pyspark.sql.functions import col, lit, coalesce

# 1. 准备基础表：贷款表（包含标签）
print("1. 准备标签数据")

# 加载最新的贷款数据（为每个客户选择最新的记录）
loan_df = silver_dfs["loan"]

# 为每个客户找到最新的贷款记录
from pyspark.sql.functions import max as sql_max
from pyspark.sql.window import Window

# 找出每个客户的最新快照日期
latest_snapshot_window = Window.partitionBy("Customer_ID")
latest_snapshots = loan_df.groupBy("Customer_ID") \
    .agg(sql_max("snapshot_date").alias("latest_snapshot"))

# 将最新快照日期与贷款数据合并
latest_loan_records = loan_df.join(
    latest_snapshots,
    (loan_df["Customer_ID"] == latest_snapshots["Customer_ID"]) & 
    (loan_df["snapshot_date"] == latest_snapshots["latest_snapshot"]),
    "inner"
).drop(latest_snapshots["Customer_ID"])

# 查看最新贷款记录的分布
print("最新贷款记录的违约分布:")
latest_loan_records.groupBy("is_default").count().show()

# 保留需要的列作为标签数据
label_columns = ["Customer_ID", "is_default", "payment_status", 
                "consecutive_missed_payments", "snapshot_date"]
label_data = latest_loan_records.select(label_columns)

# 查看标签数据
print("\n标签数据示例:")
label_data.show(5)
print(f"标签数据行数: {label_data.count()}")

# 2. 获取每个表的唯一客户记录
print("\n2. 准备特征数据")

# 属性数据 - 每个客户一条记录
attributes_features = silver_dfs["attributes"]
print(f"属性特征行数: {attributes_features.count()}")

# 财务数据 - 每个客户一条记录
financials_features = silver_dfs["financials"]
print(f"财务特征行数: {financials_features.count()}")

# 点击流聚合数据 - 每个客户一条记录
clickstream_features = silver_dfs["clickstream_aggregated"]
print(f"点击流特征行数: {clickstream_features.count()}")

# 3. 合并所有特征表
print("\n3. 合并特征表")

# 先合并三个有所有客户的表
base_features = label_data.join(
    attributes_features.drop("snapshot_date", "bronze_ingest_timestamp", "silver_process_timestamp"),
    "Customer_ID",
    "left"
).join(
    financials_features.drop("snapshot_date", "bronze_ingest_timestamp", "silver_process_timestamp"),
    "Customer_ID",
    "left"
)

# 查看基本特征
print(f"基本特征表行数: {base_features.count()}")
print(f"基本特征表列数: {len(base_features.columns)}")

# 添加点击流特征，使用左连接保留所有客户
full_features = base_features.join(
    clickstream_features.drop("silver_process_timestamp"),
    "Customer_ID",
    "left"
)

# 查看完整特征
print(f"完整特征表行数: {full_features.count()}")
print(f"完整特征表列数: {len(full_features.columns)}")

# 检查是否有客户缺少点击流数据
missing_clickstream = full_features.filter(col("fe_1_mean").isNull()).count()
print(f"缺少点击流数据的客户数: {missing_clickstream}")

# 4. 创建缺失数据标志
print("\n4. 处理缺失数据")

full_features = full_features.withColumn(
    "has_clickstream_data", 
    col("fe_1_mean").isNotNull()
)

# 统计有无点击流数据的客户分布
print("有无点击流数据的客户分布:")
full_features.groupBy("has_clickstream_data").count().show()

# 保存初步合并的特征表用于进一步处理
full_features.write.mode("overwrite").parquet("datamart/gold/gold_features_raw")
print("\n原始金特征表已保存到: datamart/gold/gold_features_raw")

1. 准备标签数据
最新贷款记录的违约分布:
+----------+-----+
|is_default|count|
+----------+-----+
|      true| 3602|
|     false| 8898|
+----------+-----+


标签数据示例:
+-----------+----------+----------------+---------------------------+-------------+
|Customer_ID|is_default|  payment_status|consecutive_missed_payments|snapshot_date|
+-----------+----------+----------------+---------------------------+-------------+
| CUS_0x1ed5|      true|SEVERELY_OVERDUE|                          9|   2025-02-01|
| CUS_0x33d2|     false|    PAID_ON_TIME|                          0|   2024-11-01|
| CUS_0x36ab|     false|    PAID_ON_TIME|                          0|   2025-11-01|
| CUS_0x3e17|      true|SEVERELY_OVERDUE|                          4|   2023-12-01|
| CUS_0x3f38|     false|    PAID_ON_TIME|                          0|   2024-11-01|
+-----------+----------+----------------+---------------------------+-------------+
only showing top 5 rows

标签数据行数: 12500

2. 准备特征数据
属性特征行数: 12500
财务特征行数: 12500
点击流特征行数: 8974

3. 合

In [37]:
# 继续之前的分析
# 分析客户ID差异
print("=== 客户ID差异分析 ===")

# 提取各表中的客户ID集合
loan_customers = set(customer_sets['loan'])
clickstream_customers = set(customer_sets['clickstream_aggregated'])

# 计算在贷款表中但不在点击流表中的客户
missing_in_clickstream = loan_customers - clickstream_customers
print(f"在贷款表中但不在点击流表中的客户数: {len(missing_in_clickstream)}")

# 显示部分缺失的客户ID
if missing_in_clickstream:
    print("缺失客户ID示例:")
    for cust_id in list(missing_in_clickstream)[:5]:
        print(f"- {cust_id}")

# 检查这些缺失的客户在贷款表中的特点
if missing_in_clickstream and 'loan' in silver_dfs:
    print("\n缺失客户在贷款表中的特点:")
    
    # 创建缺失客户ID的列表(转换为list才能在filter中使用)
    missing_customer_list = list(missing_in_clickstream)
    
    # 过滤出这些客户的贷款记录
    missing_customer_loans = silver_dfs['loan'].filter(col("Customer_ID").isin(missing_customer_list))
    
    # 统计这些贷款的特点 - 是否存在模式?
    print(f"缺失客户的贷款记录数: {missing_customer_loans.count()}")
    
    # 检查这些客户的贷款开始日期分布
    print("\n缺失客户的贷款开始日期分布:")
    missing_customer_loans.groupBy("loan_start_date") \
        .count() \
        .orderBy("loan_start_date") \
        .show(25)
    
    # 检查这些客户的违约情况
    print("\n缺失客户的违约情况:")
    missing_customer_loans.groupBy("is_default") \
        .count() \
        .show()

# 检查点击流表的时间范围
if 'clickstream_detailed' in silver_dfs:
    print("\n点击流表的快照日期范围:")
    silver_dfs['clickstream_detailed'].groupBy("snapshot_date") \
        .count() \
        .orderBy("snapshot_date") \
        .show(25)

=== 客户ID差异分析 ===
在贷款表中但不在点击流表中的客户数: 3526
缺失客户ID示例:
- CUS_0x6e81
- CUS_0x38df
- CUS_0x5ec2
- CUS_0x1600
- CUS_0xb9fd

缺失客户在贷款表中的特点:
缺失客户的贷款记录数: 38786

缺失客户的贷款开始日期分布:
+---------------+-----+
|loan_start_date|count|
+---------------+-----+
|     2024-07-01| 5555|
|     2024-08-01| 5973|
|     2024-09-01| 5423|
|     2024-10-01| 5016|
|     2024-11-01| 5368|
|     2024-12-01| 5665|
|     2025-01-01| 5786|
+---------------+-----+


缺失客户的违约情况:
+----------+-----+
|is_default|count|
+----------+-----+
|      true| 5542|
|     false|33244|
+----------+-----+


点击流表的快照日期范围:
+-------------+-----+
|snapshot_date|count|
+-------------+-----+
|   2023-01-01| 8974|
|   2023-02-01| 8974|
|   2023-03-01| 8974|
|   2023-04-01| 8974|
|   2023-05-01| 8974|
|   2023-06-01| 8974|
|   2023-07-01| 8974|
|   2023-08-01| 8974|
|   2023-09-01| 8974|
|   2023-10-01| 8974|
|   2023-11-01| 8974|
|   2023-12-01| 8974|
|   2024-01-01| 8974|
|   2024-02-01| 8974|
|   2024-03-01| 8974|
|   2024-04-01| 8974|
|   2024-05

In [22]:
from pyspark.sql.functions import lit, coalesce

# 步骤1: 创建基础客户表，包含所有12,500个客户
print("创建Gold层基础客户表...")

# 从属性表中获取所有客户ID（包含所有12,500个客户）
base_customers = silver_dfs['attributes'].select("Customer_ID")
print(f"基础客户表中的客户数: {base_customers.count()}")

# 标记哪些客户有完整数据
clickstream_customers_df = silver_dfs['clickstream_aggregated'].select("Customer_ID").distinct()

# 将客户表与点击流客户表左外连接，创建标志字段
base_customers_with_flag = base_customers.join(
    clickstream_customers_df,
    "Customer_ID",
    "left"
).withColumn(
    "has_clickstream_data",
    clickstream_customers_df.Customer_ID.isNotNull()
)

# 检查标志分布
print("\n点击流数据可用性分布:")
base_customers_with_flag.groupBy("has_clickstream_data").count().show()

# 这个表将作为我们Gold层合并的基础
print("\n基础客户表示例:")
base_customers_with_flag.show(10)

创建Gold层基础客户表...
基础客户表中的客户数: 12500

点击流数据可用性分布:
+--------------------+-----+
|has_clickstream_data|count|
+--------------------+-----+
|                true| 8974|
|               false| 3526|
+--------------------+-----+


基础客户表示例:
+-----------+--------------------+
|Customer_ID|has_clickstream_data|
+-----------+--------------------+
| CUS_0x1000|                true|
| CUS_0x1009|               false|
| CUS_0x100b|                true|
| CUS_0x1011|                true|
| CUS_0x1013|                true|
| CUS_0x1015|                true|
| CUS_0x1018|                true|
| CUS_0x1026|                true|
| CUS_0x102d|                true|
| CUS_0x102e|                true|
+-----------+--------------------+
only showing top 10 rows



In [24]:
from pyspark.sql.functions import col, when, coalesce

# 步骤2: 合并客户的属性和财务数据
print("合并客户属性和财务数据...")

# 获取客户属性数据（从silver_attributes表）
attributes_df = silver_dfs['attributes'].select(
    "Customer_ID",
    col("age"),
    col("occupation"),
    col("is_valid_age")
)

# 获取客户财务数据（从silver_financials表）
# 选择最重要的财务特征
financials_df = silver_dfs['financials'].select(
    "Customer_ID",
    col("annual_income"),
    col("Monthly_Inhand_Salary"),
    col("Num_Bank_Accounts"),
    col("Num_Credit_Card"),
    col("Interest_Rate"),
    col("num_of_loans"),
    col("Credit_Utilization_Ratio"),
    col("credit_history_total_months"),
    col("outstanding_debt"),
    col("num_delayed_payments"),
    col("credit_mix"),
    col("payment_behavior_spent_level"),
    col("payment_behavior_value_level")
)

# 合并属性和财务数据
customer_profile_df = base_customers_with_flag.join(
    attributes_df, 
    "Customer_ID", 
    "left"
).join(
    financials_df, 
    "Customer_ID", 
    "left"
)

# 查看合并结果
print("\n客户属性和财务数据合并结果:")
print(f"行数: {customer_profile_df.count()}")
print(f"列数: {len(customer_profile_df.columns)}")
customer_profile_df.printSchema()

# 更简单的方式检查缺失值
print("\n检查缺失值比例:")
for column in customer_profile_df.columns:
    null_count = customer_profile_df.filter(col(column).isNull()).count()
    total = customer_profile_df.count()
    if null_count > 0:
        print(f"{column}: {null_count} 缺失值 ({null_count/total*100:.2f}%)")

# 显示合并后的数据示例
print("\n合并后的客户资料示例:")
customer_profile_df.show(5)

合并客户属性和财务数据...

客户属性和财务数据合并结果:
行数: 12500
列数: 18
root
 |-- Customer_ID: string (nullable = true)
 |-- has_clickstream_data: boolean (nullable = false)
 |-- age: integer (nullable = true)
 |-- occupation: string (nullable = true)
 |-- is_valid_age: boolean (nullable = true)
 |-- annual_income: double (nullable = true)
 |-- Monthly_Inhand_Salary: double (nullable = true)
 |-- Num_Bank_Accounts: integer (nullable = true)
 |-- Num_Credit_Card: integer (nullable = true)
 |-- Interest_Rate: integer (nullable = true)
 |-- num_of_loans: integer (nullable = true)
 |-- Credit_Utilization_Ratio: double (nullable = true)
 |-- credit_history_total_months: integer (nullable = true)
 |-- outstanding_debt: double (nullable = true)
 |-- num_delayed_payments: integer (nullable = true)
 |-- credit_mix: string (nullable = true)
 |-- payment_behavior_spent_level: string (nullable = true)
 |-- payment_behavior_value_level: string (nullable = true)


检查缺失值比例:
age: 319 缺失值 (2.55%)

合并后的客户资料示例:
+-----------+---

In [25]:
# 步骤3: 添加聚合的点击流数据
print("添加点击流数据...")

# 获取点击流聚合数据
# 从众多点击流特征中选择最有代表性的几个
clickstream_agg_df = silver_dfs['clickstream_aggregated'].select(
    "Customer_ID",
    # 为每个特征选择均值、标准差和异常值计数
    # 这里只使用前5个特征作为示例
    *[col(f"fe_{i}_mean") for i in range(1, 6)],
    *[col(f"fe_{i}_stddev") for i in range(1, 6)],
    *[col(f"fe_{i}_outlier_count") for i in range(1, 6)],
    col("record_count")
)

# 将点击流数据左连接到客户资料表
customer_profile_with_clicks_df = customer_profile_df.join(
    clickstream_agg_df, 
    "Customer_ID", 
    "left"
)

# 处理缺失的点击流特征
# 对于没有点击流数据的客户，我们可以使用特殊值填充
processed_df = customer_profile_with_clicks_df

# 填充策略: 使用0填充缺失的点击流特征
for feature in clickstream_agg_df.columns:
    if feature != "Customer_ID":
        processed_df = processed_df.withColumn(
            feature,
            coalesce(col(feature), lit(0))  # 如果值为NULL，使用0填充
        )

# 添加表明特征来源的元数据
processed_df = processed_df.withColumn(
    "clickstream_features_imputed", 
    when(col("has_clickstream_data") == False, True).otherwise(False)
)

# 查看处理后的结果
print("\n添加点击流数据后的客户资料:")
print(f"行数: {processed_df.count()}")
print(f"列数: {len(processed_df.columns)}")

# 检查填充后的缺失值
print("\n检查填充后的缺失值比例:")
for column in processed_df.columns:
    null_count = processed_df.filter(col(column).isNull()).count()
    total = processed_df.count()
    if null_count > 0:
        print(f"{column}: {null_count} 缺失值 ({null_count/total*100:.2f}%)")

# 查看包含点击流数据的样本和不包含点击流数据的样本
print("\n有点击流数据的客户示例:")
processed_df.filter(col("has_clickstream_data") == True).show(3)

print("\n没有点击流数据的客户示例:")
processed_df.filter(col("has_clickstream_data") == False).show(3)

添加点击流数据...

添加点击流数据后的客户资料:
行数: 12500
列数: 35

检查填充后的缺失值比例:
age: 319 缺失值 (2.55%)

有点击流数据的客户示例:
+-----------+--------------------+---+-------------+------------+-------------+---------------------+-----------------+---------------+-------------+------------+------------------------+---------------------------+----------------+--------------------+----------+----------------------------+----------------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------+----------------------------+
|Customer_ID|has_clickstream_data|age|   occupation|is_valid_age|annual_income|Monthly_Inhand_Salary|Num_Bank_Accounts|Num_Credit_Card|Interest_Rate|num_of_loans|Credit_Utilization_Ratio|credit_history_total_months|outstanding_debt|num_delayed_payments|cred

In [44]:
from pyspark.sql.functions import col, when, coalesce, avg, min, max, sum as sum_col, countDistinct, current_timestamp, lit

# 步骤4: 添加贷款特征和创建标签
print("添加贷款特征和创建标签...")

# 为每个客户计算贷款汇总统计
# 选择最新的快照日期进行分析
loan_summary = silver_dfs['loan'].groupBy("Customer_ID").agg(
    # 计算贷款总数
    countDistinct("loan_id").alias("total_loans"),
    # 计算违约贷款数量 - 使用PySpark的sum_col而不是Python的sum
    sum_col(when(col("is_default") == True, 1).otherwise(0)).alias("num_defaulted_loans"),
    # 计算平均逾期次数
    avg("consecutive_missed_payments").alias("avg_missed_payments"),
    # 计算最大连续逾期次数
    max("consecutive_missed_payments").alias("max_missed_payments"),
    # 计算平均已付款百分比
    avg("percent_paid").alias("avg_percent_paid"),
    # 获取最大逾期金额
    max("overdue_amt").alias("max_overdue_amt"),
    # 获取最小账户余额
    min("balance").alias("min_balance")
)

# 将贷款汇总统计添加到客户资料表
gold_features_df = processed_df.join(
    loan_summary, 
    "Customer_ID", 
    "left"
)

# 创建目标变量(标签) - 是否有违约贷款
gold_features_df = gold_features_df.withColumn(
    "has_defaulted", 
    col("num_defaulted_loans") > 0
)

# 创建更全面的违约风险评分 (0-100之间的值)
gold_features_df = gold_features_df.withColumn(
    "default_risk_score",
    (
        # 基础分数 - 根据违约状态设置基础分数
        when(col("num_defaulted_loans") > 0, 50).otherwise(0) +
        
        # 违约历史因素 (0-20分)
        (col("num_defaulted_loans") * 5).cast("int") + 
        
        # 逾期行为因素 (0-15分)
        (col("max_missed_payments") * 1.5).cast("int") + 
        
        # 债务负担因素 (0-10分)
        when(
            (col("outstanding_debt") / col("annual_income") * 100) > 50, 10
        ).when(
            (col("outstanding_debt") / col("annual_income") * 100) > 30, 6
        ).when(
            (col("outstanding_debt") / col("annual_income") * 100) > 10, 3
        ).otherwise(0) +
        
        # 信用评级因素 (0-10分)
        when(col("credit_mix") == "BAD", 10)
        .when(col("credit_mix") == "STANDARD", 5)
        .when(col("credit_mix") == "GOOD", 0)
        .otherwise(5) +
        
        # 信用历史因素 (0-5分) - 短历史加分，长历史减分
        when(col("credit_history_total_months") < 120, 5)  # 10年以下
        .when(col("credit_history_total_months") < 240, 3)  # 20年以下
        .otherwise(0) +
        
        # 支付行为因素 (0-5分)
        when(col("payment_behavior_spent_level") == "HIGH", 3).otherwise(0) +
        when(col("payment_behavior_value_level") == "LARGE", 2).otherwise(0) +
        
        # 收入因素 (0-5分) - 低收入加分
        when(col("annual_income") < 50000, 5)
        .when(col("annual_income") < 100000, 3)
        .otherwise(0) +
        
        # 贷款数量因素 (0-5分)
        when(col("total_loans") > 3, 5)
        .when(col("total_loans") > 1, 3)
        .otherwise(0) +
        
        # 逾期金额因素 (0-5分)
        when(col("max_overdue_amt") > 5000, 5)
        .when(col("max_overdue_amt") > 1000, 3)
        .when(col("max_overdue_amt") > 0, 1)
        .otherwise(0)
        
    ).cast("int")  # 转换为整数
)

# 确保评分在0-100范围内
gold_features_df = gold_features_df.withColumn(
    "default_risk_score",
    when(col("default_risk_score") > 200, 200)  # 如果超过100，设为100
    .when(col("default_risk_score") < 0, 0)     # 如果小于0，设为0
    .otherwise(col("default_risk_score"))
)
# 添加最终处理时间戳
gold_features_df = gold_features_df.withColumn(
    "gold_process_timestamp", 
    current_timestamp()
)

# 查看最终的Gold层特征表
print("\n最终Gold层特征表:")
print(f"行数: {gold_features_df.count()}")
print(f"列数: {len(gold_features_df.columns)}")

# 显示特征列和标签列
print("\nGold层特征和标签列:")
gold_features_df.select(
    "Customer_ID", 
    "has_clickstream_data",
    "age", 
    "annual_income", 
    "credit_history_total_months",
    "clickstream_features_imputed", 
    "num_defaulted_loans", 
    "max_missed_payments",
    "has_defaulted", 
    "default_risk_score"
).show(20)

# 分析标签分布
print("\n标签分布:")
gold_features_df.groupBy("has_defaulted").count().show()

# 风险评分分布
print("\n风险评分分布:")
gold_features_df.select("default_risk_score").summary("min", "25%", "50%", "75%", "max").show()

# 保存Gold层特征表
gold_features_df.write.mode("overwrite").parquet("datamart/gold/gold_features")
print("\nGold层特征表已保存到: datamart/gold/gold_features")

添加贷款特征和创建标签...

最终Gold层特征表:
行数: 12500
列数: 45

Gold层特征和标签列:
+-----------+--------------------+---+------------------+---------------------------+----------------------------+-------------------+-------------------+-------------+------------------+
|Customer_ID|has_clickstream_data|age|     annual_income|credit_history_total_months|clickstream_features_imputed|num_defaulted_loans|max_missed_payments|has_defaulted|default_risk_score|
+-----------+--------------------+---+------------------+---------------------------+----------------------------+-------------------+-------------------+-------------+------------------+
| CUS_0x1000|                true| 18|          30625.94|                        129|                       false|                  4|                  6|         true|               105|
| CUS_0x1009|               false| 26|          52312.68|                        372|                        true|                  0|                  0|        false|                11|
|

AnalysisException: [DATATYPE_MISMATCH.DATA_DIFF_TYPES] Cannot resolve "coalesce(latest_snapshot_date, 0)" due to data type mismatch: Input to `coalesce` should all be the same type, but it's ("DATE" or "INT").;
'Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
+- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
   +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
      +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
         +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
            +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
               +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                  +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                     +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                        +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                           +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                              +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                 +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                    +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                       +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                          +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                             +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                   +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                      +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                         +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                            +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                               +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                  +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                     +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                        +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                           +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                              +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                 +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                    +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                       +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                          +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                             +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                   +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                      +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                         +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                            +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                               +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                  +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                     +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                        +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                           +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                              +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                 +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                    +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                       +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                          +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                             +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                   +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                      +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                         +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                            +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                               +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                  +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                     +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                        +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                           +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                              +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                 +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                    +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                       +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                          +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                             +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                   +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                      +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                         +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                            +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                               +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                  +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                     +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                        +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                           +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                              +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                 +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                    +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                       +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                          +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                             +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                   +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                      +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                         +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                            +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                               +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                  +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                     +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                        +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                           +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                              +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                 +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                    +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                       +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                          +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                             +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                                +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                                   +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                                      +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                                         +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                                            +- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 127 more fields]
                                                                                                                                                                                                                                                                                                               +- Join LeftOuter, (Customer_ID#27088 = Customer_ID#43423)
                                                                                                                                                                                                                                                                                                                  :- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, ... 22 more fields]
                                                                                                                                                                                                                                                                                                                  :  +- Join LeftOuter, (Customer_ID#27088 = Customer_ID#27134)
                                                                                                                                                                                                                                                                                                                  :     :- Project [Customer_ID#27088, has_clickstream_data#43198, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222]
                                                                                                                                                                                                                                                                                                                  :     :  +- Join LeftOuter, (Customer_ID#27088 = Customer_ID#43212)
                                                                                                                                                                                                                                                                                                                  :     :     :- Project [Customer_ID#27088, isnotnull(Customer_ID#27478) AS has_clickstream_data#43198]
                                                                                                                                                                                                                                                                                                                  :     :     :  +- Project [Customer_ID#27088, Customer_ID#27478]
                                                                                                                                                                                                                                                                                                                  :     :     :     +- Join LeftOuter, (Customer_ID#27088 = Customer_ID#27478)
                                                                                                                                                                                                                                                                                                                  :     :     :        :- Project [Customer_ID#27088]
                                                                                                                                                                                                                                                                                                                  :     :     :        :  +- Relation [Customer_ID#27088,Name#27089,snapshot_date#27090,age#27091,is_valid_age#27092,SSN#27093,is_valid_ssn#27094,occupation#27095,data_age_months#27096,bronze_ingest_timestamp#27097,silver_process_timestamp#27098] parquet
                                                                                                                                                                                                                                                                                                                  :     :     :        +- Deduplicate [Customer_ID#27478]
                                                                                                                                                                                                                                                                                                                  :     :     :           +- Project [Customer_ID#27478]
                                                                                                                                                                                                                                                                                                                  :     :     :              +- Relation [Customer_ID#27478,fe_1_mean#27479,fe_1_max#27480,fe_1_min#27481,fe_1_stddev#27482,fe_1_outlier_count#27483L,fe_2_mean#27484,fe_2_max#27485,fe_2_min#27486,fe_2_stddev#27487,fe_2_outlier_count#27488L,fe_3_mean#27489,fe_3_max#27490,fe_3_min#27491,fe_3_stddev#27492,fe_3_outlier_count#27493L,fe_4_mean#27494,fe_4_max#27495,fe_4_min#27496,fe_4_stddev#27497,fe_4_outlier_count#27498L,fe_5_mean#27499,fe_5_max#27500,fe_5_min#27501,... 82 more fields] parquet
                                                                                                                                                                                                                                                                                                                  :     :     +- Project [Customer_ID#43212, Name#43213, snapshot_date#43214, age#43215, is_valid_age#43216, SSN#43217, is_valid_ssn#43218, occupation#43219, data_age_months#43220, bronze_ingest_timestamp#43221, silver_process_timestamp#43222]
                                                                                                                                                                                                                                                                                                                  :     :        +- Relation [Customer_ID#43212,Name#43213,snapshot_date#43214,age#43215,is_valid_age#43216,SSN#43217,is_valid_ssn#43218,occupation#43219,data_age_months#43220,bronze_ingest_timestamp#43221,silver_process_timestamp#43222] parquet
                                                                                                                                                                                                                                                                                                                  :     +- Project [Customer_ID#27134, snapshot_date#27135, annual_income#27136, Monthly_Inhand_Salary#27137, Num_Bank_Accounts#27138, Num_Credit_Card#27139, Interest_Rate#27140, num_of_loans#27141, loan_types_count#27142, has_personal_loan#27143, has_student_loan#27144, has_mortgage_loan#27145, has_auto_loan#27146, has_home_equity_loan#27147, has_debt_consolidation_loan#27148, has_credit_builder_loan#27149, has_payday_loan#27150, Delay_from_due_date#27151, num_delayed_payments#27152, changed_credit_limit#27153, Num_Credit_Inquiries#27154, credit_mix#27155, outstanding_debt#27156, Credit_Utilization_Ratio#27157, ... 11 more fields]
                                                                                                                                                                                                                                                                                                                  :        +- Relation [Customer_ID#27134,snapshot_date#27135,annual_income#27136,Monthly_Inhand_Salary#27137,Num_Bank_Accounts#27138,Num_Credit_Card#27139,Interest_Rate#27140,num_of_loans#27141,loan_types_count#27142,has_personal_loan#27143,has_student_loan#27144,has_mortgage_loan#27145,has_auto_loan#27146,has_home_equity_loan#27147,has_debt_consolidation_loan#27148,has_credit_builder_loan#27149,has_payday_loan#27150,Delay_from_due_date#27151,num_delayed_payments#27152,changed_credit_limit#27153,Num_Credit_Inquiries#27154,credit_mix#27155,outstanding_debt#27156,Credit_Utilization_Ratio#27157,... 11 more fields] parquet
                                                                                                                                                                                                                                                                                                                  +- Project [Customer_ID#43423, fe_1_mean#43424, fe_1_max#43425, fe_1_min#43426, fe_1_stddev#43427, fe_1_outlier_count#43428L, fe_2_mean#43429, fe_2_max#43430, fe_2_min#43431, fe_2_stddev#43432, fe_2_outlier_count#43433L, fe_3_mean#43434, fe_3_max#43435, fe_3_min#43436, fe_3_stddev#43437, fe_3_outlier_count#43438L, fe_4_mean#43439, fe_4_max#43440, fe_4_min#43441, fe_4_stddev#43442, fe_4_outlier_count#43443L, fe_5_mean#43444, fe_5_max#43445, fe_5_min#43446, ... 82 more fields]
                                                                                                                                                                                                                                                                                                                     +- Relation [Customer_ID#43423,fe_1_mean#43424,fe_1_max#43425,fe_1_min#43426,fe_1_stddev#43427,fe_1_outlier_count#43428L,fe_2_mean#43429,fe_2_max#43430,fe_2_min#43431,fe_2_stddev#43432,fe_2_outlier_count#43433L,fe_3_mean#43434,fe_3_max#43435,fe_3_min#43436,fe_3_stddev#43437,fe_3_outlier_count#43438L,fe_4_mean#43439,fe_4_max#43440,fe_4_min#43441,fe_4_stddev#43442,fe_4_outlier_count#43443L,fe_5_mean#43444,fe_5_max#43445,fe_5_min#43446,... 82 more fields] parquet
