# Install Required Library

In [None]:
pip install snowflake-snowpark-python



# Create Snowpark Session

In [None]:
from snowflake.snowpark.session import Session

username = 'MAGICDASH91'
password = '*************'
account = 'tk11073.europe-west4.gcp'
warehouse = 'COMPUTE_WH'
database = 'DATABASE'
schema = 'PUBLIC'

def snowpark_session_create():
  connection_params = {
      "user": username,
      "password": password,
      "account": account,
      "warehouse": warehouse,
      "database": database,
      "schema": schema
  }

  # Create the session
  session = Session.builder.configs(connection_params).create()
  return session

demo_session = snowpark_session_create()

# Start Querying your data

In [None]:
df = demo_session.sql('SELECT * FROM CROSS_SELL')
df.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"ID"  |"GENDER"  |"AGE"  |"DRIVING_LICENSE"  |"REGION_CODE"  |"PREVIOUSLY_INSURED"  |"VEHICLE_AGE"  |"VEHICLE_DAMAGE"  |"ANNUAL_PREMIUM"  |"POLICY_SALES_CHANNEL"  |"VINTAGE"  |"RESPONSE"  |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|1     |Male      |44     |1                  |28.0           |0                     |> 2 Years      |True              |40454.0           |26.0                    |217        |1           |
|2     |Male      |76     |1                  |3.0            |0                     |1-2 Year       |False             |33536.0           |26.0                    |183        |0           |
|3     |Male      |47     |1                 

# Snowflake Transformation

In [None]:
import snowflake.snowpark.functions as F

In [None]:
# Show the Age where Age between 30 and 44
df_age = df.filter(F.col('AGE').between(30,44))
df_age.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"ID"  |"GENDER"  |"AGE"  |"DRIVING_LICENSE"  |"REGION_CODE"  |"PREVIOUSLY_INSURED"  |"VEHICLE_AGE"  |"VEHICLE_DAMAGE"  |"ANNUAL_PREMIUM"  |"POLICY_SALES_CHANNEL"  |"VINTAGE"  |"RESPONSE"  |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|1     |Male      |44     |1                  |28.0           |0                     |> 2 Years      |True              |40454.0           |26.0                    |217        |1           |
|10    |Female    |32     |1                  |6.0            |1                     |< 1 Year       |False             |28771.0           |152.0                   |80         |0           |
|13    |Female    |41     |1                 

In [None]:
# Create an aggregation about Average ANNUAL_PREMIUM for every VEHICLE_AGE
avg_ann = df.group_by('VEHICLE_AGE').agg(F.avg('ANNUAL_PREMIUM').alias('AVERAGE_ANNUAL_PREMIUM'))
avg_ann.show()

--------------------------------------------
|"VEHICLE_AGE"  |"AVERAGE_ANNUAL_PREMIUM"  |
--------------------------------------------
|> 2 Years      |35654.4994690             |
|1-2 Year       |30523.5821203             |
|< 1 Year       |30119.5520251             |
--------------------------------------------



In [None]:
# Simple Multiplication
mul_col = df.with_column("AGE & VINTAGE", F.col('AGE') * F.col('VINTAGE'))
mul_col.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"ID"  |"GENDER"  |"AGE"  |"DRIVING_LICENSE"  |"REGION_CODE"  |"PREVIOUSLY_INSURED"  |"VEHICLE_AGE"  |"VEHICLE_DAMAGE"  |"ANNUAL_PREMIUM"  |"POLICY_SALES_CHANNEL"  |"VINTAGE"  |"RESPONSE"  |"AGE & VINTAGE"  |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|1     |Male      |44     |1                  |28.0           |0                     |> 2 Years      |True              |40454.0           |26.0                    |217        |1           |9548             |
|2     |Male      |76     |1                  |3.0            |0                     |1-2 Year       |False             |33536.0           |26.0                    

# For other Snowpark Functions you can check here :

https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.functions.function

# Alter (Editing) Existing Dataframe

In [None]:
# Rename "AVERAGE_ANNUAL_PREMIUM" to be "AVERAGE_ANNUAL_PREMIUM_ALL_AGE"
avg_ann = avg_ann.with_column_renamed(F.col('AVERAGE_ANNUAL_PREMIUM'), 'AVERAGE_ANNUAL_PREMIUM_ALL_AGE')
avg_ann.show()

----------------------------------------------------
|"VEHICLE_AGE"  |"AVERAGE_ANNUAL_PREMIUM_ALL_AGE"  |
----------------------------------------------------
|> 2 Years      |35654.4994690                     |
|1-2 Year       |30523.5821203                     |
|< 1 Year       |30119.5520251                     |
----------------------------------------------------



# Snowflake Drop column

In [None]:
df.drop("ID").show()

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"GENDER"  |"AGE"  |"DRIVING_LICENSE"  |"REGION_CODE"  |"PREVIOUSLY_INSURED"  |"VEHICLE_AGE"  |"VEHICLE_DAMAGE"  |"ANNUAL_PREMIUM"  |"POLICY_SALES_CHANNEL"  |"VINTAGE"  |"RESPONSE"  |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|Male      |44     |1                  |28.0           |0                     |> 2 Years      |True              |40454.0           |26.0                    |217        |1           |
|Male      |76     |1                  |3.0            |0                     |1-2 Year       |False             |33536.0           |26.0                    |183        |0           |
|Male      |47     |1                  |28.0           |0                     |>

# Join the table

In [None]:
# We have to make 2nd aggregation dataframe
avg_ann2 = df_age.group_by('VEHICLE_AGE').agg(F.avg('ANNUAL_PREMIUM').alias('AVERAGE_ANNUAL_PREMIUM_30_TO_44'))
avg_ann2.show()

-----------------------------------------------------
|"VEHICLE_AGE"  |"AVERAGE_ANNUAL_PREMIUM_30_TO_44"  |
-----------------------------------------------------
|> 2 Years      |33157.8273078                      |
|< 1 Year       |27853.8153776                      |
|1-2 Year       |28789.0972791                      |
-----------------------------------------------------



In [None]:
join_df = avg_ann.join(avg_ann2, "VEHICLE_AGE").select(avg_ann.VEHICLE_AGE.alias("VEHICLE_AGE"),
                                                       avg_ann.AVERAGE_ANNUAL_PREMIUM_ALL_AGE,
                                                       avg_ann2.AVERAGE_ANNUAL_PREMIUM_30_TO_44)

join_df.show()

----------------------------------------------------------------------------------------
|"VEHICLE_AGE"  |"AVERAGE_ANNUAL_PREMIUM_ALL_AGE"  |"AVERAGE_ANNUAL_PREMIUM_30_TO_44"  |
----------------------------------------------------------------------------------------
|> 2 Years      |35654.4994690                     |33157.8273078                      |
|1-2 Year       |30523.5821203                     |28789.0972791                      |
|< 1 Year       |30119.5520251                     |27853.8153776                      |
----------------------------------------------------------------------------------------

