# Import Section

In [1]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import count
from dotenv import load_dotenv
import os
import toml
from pathlib import Path

  machar = _get_machar(dtype)
  import pkg_resources


# Set Session

In [5]:
# Get Secret
load_dotenv()
snowflake_password = os.getenv("PASSWORD")

# Get connections.toml path
current_notebook_dir = Path(os.getcwd())
config_file_path = current_notebook_dir.parent.parent / 'config' / 'connections.toml'

try:
    with open(config_file_path, 'r') as f:
        config = toml.load(f)
    
    if "connections" not in config or "my_connection" not in config["connections"]:
        raise ValueError(f"Section 'my_connection' not found in file '{config_file_path}'.")
    
    connection_parameters = config["connections"]["my_connection"]

except FileNotFoundError:
    raise FileNotFoundError(f"Configuration file '{config_file_path}' not found. Make sure the path is correct.")
except toml.TomlDecodeError as e:
    raise ValueError(f"TOML parsing error in file '{config_file_path}': {e}. Check the file syntax.")
except Exception as e:
    raise Exception(f"An unexpected error occurred while loading the configuration: {e}")

connection_parameters['password'] = snowflake_password
session = Session.builder.configs(connection_parameters).create()

print("Connected to Snowflake!")
print(f"Database: {session.get_current_database()}")
print(f"Schema: {session.get_current_schema()}")
print(f"Warehouse: {session.get_current_warehouse()}")

Connected to Snowflake!
Database: "CHURN_PROJECT"
Schema: None
Warehouse: "ROMAN_WH"


# Get Data

In [6]:
df = session.table("RAW.CUSTOMER_CHURN_RAW")


print("\n📊 Row count:", df.count())
df.print_schema()

print("\n🔍 Sample data:")
df.show(10)

print("\n🧑‍🤝_ Gender distribution:")
df.group_by("GENDER").agg(count("*").alias("count")).show()

print("\n🌍 Geography distribution:")
df.group_by("GEOGRAPHY").agg(count("*").alias("count")).show()

print("\n💳 Card types:")
df.group_by("CARD_TYPE").agg(count("*").alias("count")).show()

print("\n💳 Has credit card:")
df.group_by("HASCRCARD").agg(count("*").alias("count")).show()

print("\n🔥 Active members:")
df.group_by("ISACTIVEMEMBER").agg(count("*").alias("count")).show()

print("\n🚪 Exited distribution:")
df.group_by("EXITED").agg(count("*").alias("count")).show()

print("\n📈 Summary statistics (AGE, BALANCE, CREDITSCORE):")
df.select("AGE", "BALANCE", "CREDITSCORE").describe().show()

print("\n💰 Salary vs. Satisfaction:")
df.select("ESTIMATEDSALARY", "SATISFACTION_SCORE").describe().show()

session.close()
print("\nSession closed!")


📊 Row count: 10000
root
 |-- "ROWNUMBER": LongType() (nullable = True)
 |-- "CUSTOMERID": StringType(16777216) (nullable = True)
 |-- "SURNAME": StringType(16777216) (nullable = True)
 |-- "CREDITSCORE": LongType() (nullable = True)
 |-- "GEOGRAPHY": StringType(16777216) (nullable = True)
 |-- "GENDER": StringType(16777216) (nullable = True)
 |-- "AGE": LongType() (nullable = True)
 |-- "TENURE": LongType() (nullable = True)
 |-- "BALANCE": DoubleType() (nullable = True)
 |-- "NUMOFPRODUCTS": LongType() (nullable = True)
 |-- "HASCRCARD": LongType() (nullable = True)
 |-- "ISACTIVEMEMBER": LongType() (nullable = True)
 |-- "ESTIMATEDSALARY": DoubleType() (nullable = True)
 |-- "EXITED": LongType() (nullable = True)
 |-- "COMPLAIN": LongType() (nullable = True)
 |-- "SATISFACTION_SCORE": LongType() (nullable = True)
 |-- "CARD_TYPE": StringType(16777216) (nullable = True)
 |-- "POINT_EARNED": LongType() (nullable = True)
 |-- "FILE_NAME": StringType(16777216) (nullable = True)
 |-- "LO