In [1]:
# Snowpark for Python
import snowflake.snowpark
from snowflake.snowpark.session import Session
from snowflake.snowpark.functions import udf, col, call_udf
from snowflake.snowpark.types import *

# Other
import sys, string, io, os, math
import zipfile
import pickle
import pandas as pd
import json
from cachetools import cached

# increase the max number of columns to display - default 20, switch to truncate view if exceeded
pd.set_option('display.max_columns', 50)
# increase the width of the column so we can see more raw text - default 50 characters
pd.set_option('display.max_colwidth', 150)

In [2]:
connection_parameters = {
    "account": 'wne',
    "user": 'an.jiang@imaginelearning.com',
    "authenticator": 'externalbrowser',
    # "role": os.environ["SNOWFLAKE_ROLE"]
    # "warehouse": 'SQL_WH',
    "warehouse": 'DATA_SCIENCE_WH',
    # "database": 'weld_north_test',
    "database": 'sandbox',
    # "schema": 'zendesk'
    "schema": 'scratch'
  }
# connection_parameters = json.load(open(r'C:\Users\an.jiang\src\snowflake_credentials.json'))

In [3]:
# Create Snowflake Session object
session = Session.builder.configs(connection_parameters).create()
print(session.sql("select current_warehouse(), current_database(), current_schema()").collect())

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
[Row(CURRENT_WAREHOUSE()='DATA_SCIENCE_WH', CURRENT_DATABASE()='SANDBOX', CURRENT_SCHEMA()='SCRATCH')]


## Setting up the Examples for this Section

In [4]:
session.sql('CREATE OR REPLACE TABLE sample_product_data (id INT, parent_id INT, category_id INT, name VARCHAR, serial_number VARCHAR, key INT, "3rd" INT)').collect()

[Row(status='Table SAMPLE_PRODUCT_DATA successfully created.')]

In [5]:
session.sql("""
INSERT INTO sample_product_data VALUES
(1, 0, 5, 'Product 1', 'prod-1', 1, 10),
(2, 1, 5, 'Product 1A', 'prod-1-A', 1, 20),
(3, 1, 5, 'Product 1B', 'prod-1-B', 1, 30),
(4, 0, 10, 'Product 2', 'prod-2', 2, 40),
(5, 4, 10, 'Product 2A', 'prod-2-A', 2, 50),
(6, 4, 10, 'Product 2B', 'prod-2-B', 2, 60),
(7, 0, 20, 'Product 3', 'prod-3', 3, 70),
(8, 7, 20, 'Product 3A', 'prod-3-A', 3, 80),
(9, 7, 20, 'Product 3B', 'prod-3-B', 3, 90),
(10, 0, 50, 'Product 4', 'prod-4', 4, 100),
(11, 10, 50, 'Product 4A', 'prod-4-A', 4, 100),
(12, 10, 50, 'Product 4B', 'prod-4-B', 4, 100)
""").collect()

[Row(number of rows inserted=12)]

In [6]:
session.sql("SELECT count(*) FROM sample_product_data").collect()

[Row(COUNT(*)=12)]

## Constructing a DataFrame

In [7]:
# Create a DataFrame from the data in the "sample_product_data" table.
df_table = session.table("sample_product_data")

# To print out the first 10 rows, call 
df_table.show()

-------------------------------------------------------------------------------------
|"ID"  |"PARENT_ID"  |"CATEGORY_ID"  |"NAME"      |"SERIAL_NUMBER"  |"KEY"  |"3rd"  |
-------------------------------------------------------------------------------------
|1     |0            |5              |Product 1   |prod-1           |1      |10     |
|2     |1            |5              |Product 1A  |prod-1-A         |1      |20     |
|3     |1            |5              |Product 1B  |prod-1-B         |1      |30     |
|4     |0            |10             |Product 2   |prod-2           |2      |40     |
|5     |4            |10             |Product 2A  |prod-2-A         |2      |50     |
|6     |4            |10             |Product 2B  |prod-2-B         |2      |60     |
|7     |0            |20             |Product 3   |prod-3           |3      |70     |
|8     |7            |20             |Product 3A  |prod-3-A         |3      |80     |
|9     |7            |20             |Product 3B  |pro

In [8]:
type(df_table)

snowflake.snowpark.table.Table

In [20]:
from snowflake.snowpark.functions import col
df = session.table("sample_product_data").filter(col("id") == 1)
df.show()

------------------------------------------------------------------------------------
|"ID"  |"PARENT_ID"  |"CATEGORY_ID"  |"NAME"     |"SERIAL_NUMBER"  |"KEY"  |"3rd"  |
------------------------------------------------------------------------------------
|1     |0            |5              |Product 1  |prod-1           |1      |10     |
------------------------------------------------------------------------------------



In [21]:
df = session.table("sample_product_data").select(col("id"), col("name"), col("serial_number"))
df.show()

---------------------------------------
|"ID"  |"NAME"      |"SERIAL_NUMBER"  |
---------------------------------------
|1     |Product 1   |prod-1           |
|2     |Product 1A  |prod-1-A         |
|3     |Product 1B  |prod-1-B         |
|4     |Product 2   |prod-2           |
|5     |Product 2A  |prod-2-A         |
|6     |Product 2B  |prod-2-B         |
|7     |Product 3   |prod-3           |
|8     |Product 3A  |prod-3-A         |
|9     |Product 3B  |prod-3-B         |
|10    |Product 4   |prod-4           |
---------------------------------------

