In [23]:
from configparser import *

In [24]:
from snowflake.snowpark import Session
import snowflake.snowpark.functions as F
from datetime import date

In [25]:
import sys, json
with open("connection_parameters_true.json") as jsonfile:
    credentials_dict = json.load(jsonfile)

In [26]:
def snowpark_session_create():
    session = Session.builder.configs(credentials_dict).create()
    return session

In [27]:
demo_session = snowpark_session_create()

In [28]:
df = demo_session.sql('select * from snowflake_sample_data.tpcds_sf10tcl.item limit 1000')

In [29]:
#df.show()

In [30]:
#for row in df.collect():
#    print(row.I_ITEM_ID)

In [31]:
demo_session.use_database("snowflake_sample_data")
demo_session.use_schema("tpcds_sf10tcl")

In [32]:
df = demo_session.table("item")

df = df.select(
    "I_ITEM_ID",
    "I_REC_START_DATE",
    "I_REC_END_DATE",
    "I_ITEM_DESC",
    "I_CURRENT_PRICE",
    "I_WHOLESALE_COST",
    "I_CATEGORY",
    "I_BRAND",  
).filter((F.col("I_REC_START_DATE").between(date(1999,1,1),date(2010,12,31)))&(F.col("I_REC_START_DATE").is_not_null())&(F.col("I_REC_END_DATE").is_not_null()))

In [33]:
df.show()

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"I_ITEM_ID"       |"I_REC_START_DATE"  |"I_REC_END_DATE"  |"I_ITEM_DESC"                                       |"I_CURRENT_PRICE"  |"I_WHOLESALE_COST"  |"I_CATEGORY"  |"I_BRAND"             |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|AAAAAAAAOBEPDAAA  |1999-10-28          |2001-10-26        |As only schemes must smoke away for a women. St...  |2.63               |2.34                |Jewelry       |univbrand #8          |
|AAAAAAAAECEPDAAA  |1999-10-28          |2001-10-26        |Thoroughly written boxes favour by a provisions...  |2.80               |4.92                |Women         |edu packamalg #2      |
|AAAAAAAAKCEPDAAA  |1999-10-28     

In [34]:
df = df.with_column("Margin",(F.col("I_WHOLESALE_COST") - (F.col("I_CURRENT_PRICE"))))

In [35]:
df.sort(F.col("Margin").desc()).show()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"I_ITEM_ID"       |"I_REC_START_DATE"  |"I_REC_END_DATE"  |"I_ITEM_DESC"                                       |"I_CURRENT_PRICE"  |"I_WHOLESALE_COST"  |"I_CATEGORY"  |"I_BRAND"          |"MARGIN"  |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|AAAAAAAAICMFBAAA  |1999-10-28          |2001-10-26        |Then strong rates may ask ago local principles....  |3.09               |87.82               |Music         |amalgscholar #2    |84.73     |
|AAAAAAAAEDMBFAAA  |1999-10-28          |2001-10-26        |Relations choke so noble, academic sons. Other ...  |1.22               |85.44               |Home          |univnameless #2    |84.22  

In [36]:
total1999 = df.group_by("I_CATEGORY").agg(F.sum("Margin").alias("Margin_1999"))

In [37]:
total1999.show()

--------------------------------
|"I_CATEGORY"  |"MARGIN_1999"  |
--------------------------------
|Home          |-24468.85      |
|Electronics   |-23135.00      |
|Sports        |-23916.55      |
|Children      |-22216.48      |
|Women         |-21974.54      |
|Jewelry       |-24591.25      |
|Music         |-24850.03      |
|Shoes         |-24385.82      |
|NULL          |-149.31        |
|Books         |-23189.11      |
--------------------------------



In [38]:
df2 = demo_session.table("item")

df2 = df.select(
    "I_ITEM_ID",
    "I_REC_START_DATE",
    "I_REC_END_DATE",
    "I_ITEM_DESC",
    "I_CURRENT_PRICE",
    "I_WHOLESALE_COST",
    "I_CATEGORY",
    "I_BRAND",  
).filter((F.col("I_REC_START_DATE").between(date(2000,1,1),date(2001,12,31))))

df2 = df2.with_column("Margin",(F.col("I_WHOLESALE_COST") - (F.col("I_CURRENT_PRICE"))))

total2001 = df.group_by("I_CATEGORY").agg(F.sum("Margin").alias("Margin_2001"))

In [39]:
total1999 = total1999.with_column_renamed(F.col("Margin_1999"),"Margin_1999")

In [40]:
joinnedDFs = total1999.join(total2001, total1999.I_CATEGORY == total2001.I_CATEGORY).select(
    total1999.col("I_CATEGORY").alias("I_CATEGORY"),
    total1999.col("Margin_1999").alias("Margin_1999"),
    total2001.col("Margin_2001").alias("Margin_2001"),
)

In [41]:
joinnedDFs = (
    joinnedDFs.with_column(
        "Totals",
        F.array_construct(F.col("Margin_1999"),F.col("Margin_2001"))
    )
    .with_column(
        "Comment",
        F.concat(F.lit("Logic was created by "), F.current_user())
    )
    .with_column(
        "TRUST_LEVEL",
        F.when(F.col("I_CATEGORY") == "Sports", 0.4)
        .otherwise(1)
    )
)

In [42]:
joinnedDFs = joinnedDFs.drop(F.col("Margin_1999")).drop(F.col("Margin_2001")).show()

------------------------------------------------------------------------------
|"I_CATEGORY"  |"TOTALS"      |"COMMENT"                     |"TRUST_LEVEL"  |
------------------------------------------------------------------------------
|Sports        |[             |Logic was created by VALDRAZ  |0.4            |
|              |  -23916.55,  |                              |               |
|              |  -23916.55   |                              |               |
|              |]             |                              |               |
|Children      |[             |Logic was created by VALDRAZ  |1.0            |
|              |  -22216.48,  |                              |               |
|              |  -22216.48   |                              |               |
|              |]             |                              |               |
|Women         |[             |Logic was created by VALDRAZ  |1.0            |
|              |  -21974.54,  |                     

In [44]:
demo_session.close()