1. Clone the project repo (https://github.com/PacktPublishing/Data-Science-Solutions-with-Snowflake) and change you working directory to point to it.
2. Install virtualenv `pip install virtualenv`
3. Create a virtual environment in project root directory `virtualenv venv --python=python3.8.x` (Snowpark for Python requires Python 3.8.X)
4. Install Snowflake Connector for Python with Pandas `pip install snowflake-connector-python[pandas]`
5. Install Snowpark for Python with Pandas `pip install snowflake-snowpark-python[pandas]`
6. Install IPYthon `pip install IPYthon`
7. Install Jupyter Notebooks `pip install jupyter`
8. Install Sweetviz `pip install sweetviz`

In [None]:
import pandas as pd
import sweetviz as sv

from configparser import ConfigParser
from snowflake.snowpark import Session


In [None]:
# Create a 'config.ini' file to store credentials locally for security
config = ConfigParser()
cfg_path = "../config.ini"
config.read(cfg_path)


In [None]:
# Connect to Snowflake
account = config["CONNECTION"]["account"]
user = config["CONNECTION"]["user"]
password = config["CONNECTION"]["password"]
role = config["CONNECTION"]["role"]
warehouse = config["CONNECTION"]["warehouse"]
database = config["CONNECTION"]["database"]
schema = config["CONNECTION"]["schema"]

connection_parameters = {
    "account": account,
    "user": user,
    "password": password,
    "role": role,
    "warehouse": warehouse,
    "database": database,
    "schema": schema,
}

session = Session.builder.configs(connection_parameters).create()

# Test connection
print(session.sql("select * from global_superstore_orders limit 5").collect())


In [None]:
# Load data tables locally as Snowflake tables
orders_raw = session.table("GLOBAL_SUPERSTORE_ORDERS")
returns_raw = session.table("GLOBAL_SUPERSTORE_RETURNS")

# Print an example of the orders_raw dataframe in Pandas format
orders_raw.limit(5).toPandas()


In [None]:
# Join tables orders_raw and returns_raw on order_id (uses Snowflake's join syntax)
orders_join = orders_raw.join(returns_raw, "ORDER_ID", how="left")

# Convert to Pandas DataFrame (Snowpark has data manipulation limitations)
orders_pandas = orders_join.toPandas()


In [None]:
# Display Sweetviz analysis results
sv.analyze(orders_pandas).show_html()


In [None]:
def write_to_snowflake(df: pd.DataFrame, table_name: str) -> None:
    """
    Write Pandas DataFrame to Snowflake table
    """
    session.write_pandas(df, table_name, auto_create_table=True)


write_to_snowflake(orders_pandas, "GLOBAL_SUPERSTORE_MERGED")
