In [1]:
# Add module folder to notebook
import os
import sys

from dotenv import find_dotenv
sys.path.append(os.path.dirname(find_dotenv()))

In [23]:
from snowflake.snowpark import functions
import datetime

from app.snowpark_session.session import snowpark_session

In [3]:
sp_session = snowpark_session()

### Run SQL Commands

In [20]:
sql = """
select *
from snowflake_sample_data.tpch_sf10.lineitem
limit 1000
"""

df = sp_session.sql(sql)

### Show dataframe

In [11]:
df.show()

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"L_ORDERKEY"  |"L_PARTKEY"  |"L_SUPPKEY"  |"L_LINENUMBER"  |"L_QUANTITY"  |"L_EXTENDEDPRICE"  |"L_DISCOUNT"  |"L_TAX"  |"L_RETURNFLAG"  |"L_LINESTATUS"  |"L_SHIPDATE"  |"L_COMMITDATE"  |"L_RECEIPTDATE"  |"L_SHIPINSTRUCT"   |"L_SHIPMODE"  |"L_COMMENT"                              |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|52007138      |1431204      |56219        |1               |7.00          |7945.91            |0.00          |0.06     |A               |F            

In [12]:
df.collect()

[Row(L_ORDERKEY=58624485, L_PARTKEY=982512, L_SUPPKEY=7522, L_LINENUMBER=7, L_QUANTITY=Decimal('14.00'), L_EXTENDEDPRICE=Decimal('22322.58'), L_DISCOUNT=Decimal('0.09'), L_TAX=Decimal('0.08'), L_RETURNFLAG='N', L_LINESTATUS='O', L_SHIPDATE=datetime.date(1997, 5, 11), L_COMMITDATE=datetime.date(1997, 5, 4), L_RECEIPTDATE=datetime.date(1997, 5, 31), L_SHIPINSTRUCT='DELIVER IN PERSON', L_SHIPMODE='REG AIR', L_COMMENT='uffily. blithely pending dep'),
 Row(L_ORDERKEY=58624486, L_PARTKEY=1070302, L_SUPPKEY=20323, L_LINENUMBER=1, L_QUANTITY=Decimal('43.00'), L_EXTENDEDPRICE=Decimal('54706.75'), L_DISCOUNT=Decimal('0.06'), L_TAX=Decimal('0.02'), L_RETURNFLAG='N', L_LINESTATUS='O', L_SHIPDATE=datetime.date(1998, 5, 6), L_COMMITDATE=datetime.date(1998, 5, 16), L_RECEIPTDATE=datetime.date(1998, 5, 20), L_SHIPINSTRUCT='NONE', L_SHIPMODE='SHIP', L_COMMENT=' above the sheaves haggle afte'),
 Row(L_ORDERKEY=58624486, L_PARTKEY=552563, L_SUPPKEY=77569, L_LINENUMBER=2, L_QUANTITY=Decimal('24.00'), L_EX

In [13]:
for row in df.collect():
    print(f"{row.L_ORDERKEY}: {row.L_EXTENDEDPRICE}")

58624485: 22322.58
58624486: 54706.75
58624486: 38772.96
58624486: 3036.90
58624486: 55745.04
58624486: 45517.23
58624487: 83193.30
58624512: 53415.20
58624513: 47999.16
58624513: 42964.37
58624513: 22180.80
58624513: 50627.70
58624514: 13072.50
58624514: 2752.64
58624514: 81955.44
58624514: 28902.93
58624514: 86332.42
58624515: 11829.42
58624515: 43164.80
58624515: 11126.22
58624515: 7907.70
58624515: 87794.28
58624515: 62112.14
58624516: 55572.86
58624516: 5911.25
58624516: 41841.58
58624516: 52944.12
58624517: 40425.18
58624517: 36349.02
58624517: 40647.60
58624517: 74366.67
58624517: 17643.00
58624517: 73529.24
58624517: 24565.60
58624518: 63172.72
58624518: 62524.58
58624518: 44205.20
58624518: 34814.30
58624518: 51492.40
58624519: 24411.14
58624519: 19104.14
58624519: 15998.95
58624519: 10981.62
58624519: 1809.69
58624519: 6888.49
58624544: 21125.12
58624544: 21624.47
58624544: 39451.88
58624544: 29759.40
58624544: 4828.32
58624544: 23315.55
58624545: 37200.86
58624545: 18906.38


### Set Database and Schema for Session

In [15]:
sp_session.use_database("snowflake_sample_data")

sp_session.use_schema("tpch_sf10")

### Set Table for Session

In [17]:
sp_session.table("lineitem")

<snowflake.snowpark.table.Table at 0x2088e839e20>

### Select from Dataframe and Filter

In [24]:
from_date = datetime.date(1997, 1, 1)
to_date = datetime.date(1997, 12, 31)

df = df.select(
    "L_SHIPDATE",
    "L_EXTENDEDPRICE",
    "L_SHIPMODE",
    "L_QUANTITY",
    "L_DISCOUNT",
).filter(
    functions.col("L_SHIPDATE").between(from_date, to_date)
)

In [25]:
df.show()

---------------------------------------------------------------------------------
|"L_SHIPDATE"  |"L_EXTENDEDPRICE"  |"L_SHIPMODE"  |"L_QUANTITY"  |"L_DISCOUNT"  |
---------------------------------------------------------------------------------
|1997-01-05    |35034.44           |TRUCK         |28.00         |0.09          |
|1997-03-03    |53808.81           |TRUCK         |33.00         |0.06          |
|1997-03-24    |53854.40           |SHIP          |40.00         |0.07          |
|1997-04-04    |51264.00           |SHIP          |45.00         |0.05          |
|1997-02-18    |66305.28           |REG AIR       |48.00         |0.04          |
|1997-03-27    |19925.46           |TRUCK         |18.00         |0.07          |
|1997-11-16    |2200.36            |RAIL          |2.00          |0.04          |
|1997-09-14    |18963.90           |TRUCK         |15.00         |0.10          |
|1997-08-29    |1266.35            |FOB           |1.00          |0.06          |
|1997-09-27    |

### Create New Columns

In [None]:
df = df.with_column("REVENUE", 