# Simple EDA

In [None]:
import pandas as pd
import snowflake.snowpark as snowpark
from snowflake.snowpark.functions import count
from snowflake.snowpark.context import get_active_session

In [None]:
session = get_active_session()
df = session.table("RAW.CUSTOMER_CHURN_RAW")

# Basic info (logged, but not returned)
print("📊 Row count:", df.count())
df.print_schema()

print("\n🔍 Sample data:")
df.show(10)

# Categorical value counts
print("\n🧑‍🤝‍🧑 Gender distribution:")
df.group_by("GENDER").agg(count("*").alias("count")).show()

print("\n🌍 Geography distribution:")
df.group_by("GEOGRAPHY").agg(count("*").alias("count")).show()

print("\n💳 Card types:")
df.group_by("CARD_TYPE").agg(count("*").alias("count")).show()

# Binary flags
print("\n💳 Has credit card:")
df.group_by("HASCRCARD").agg(count("*").alias("count")).show()

print("\n🔥 Active members:")
df.group_by("ISACTIVEMEMBER").agg(count("*").alias("count")).show()

# Target variable
print("\n🚪 Exited distribution:")
df.group_by("EXITED").agg(count("*").alias("count")).show()

# Numerical summary
print("\n📈 Summary statistics (AGE, BALANCE, CREDITSCORE):")
df.select("AGE", "BALANCE", "CREDITSCORE").describe().show()

print("\n💰 Salary vs. Satisfaction:")
df.select("ESTIMATEDSALARY", "SATISFACTION_SCORE").describe().show()
