# Window Functions in Snowpark

In this section we will perform aggregate and set-based operations on Dataframes 

For more links 

1. [How QUALIFY Works — With In-Depth Explanation and Examples](https://medium.com/snowflake/how-qualify-works-with-in-depth-explanation-and-examples-bbde9fc742db)

2. [Window Class](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/snowpark/window)

3. [Window Functions in Snowflake Snowpark](https://thinketl.com/window-functions-in-snowflake-snowpark/)


In [None]:
Use Database SNOWPARK_DB;
USE SCHEMA SOURCED;

SELECT * FROM SNOWPARK_DB.SOURCED.EMPLOYEES;

In [None]:
from snowflake.snowpark.context import get_active_session

session = get_active_session()
dft = session.table("snowpark_db.sourced.employees")
dft

In [None]:
-- List of employees, ranked by their salaries per department 
select department, employee_name, salary,
    row_number() over(
        partition by department
        order by salary desc) as rank
from snowpark_db.sourced.employees;

In [None]:
from snowflake.snowpark import Window
from snowflake.snowpark.functions import *

winSpec = Window.partition_by("department").order_by(col("salary").desc())
df = dft.select("department", "employee_name", "salary",
    row_number().over(winSpec).alias("rank"))
df.queries
df

In [None]:
-- list of employees with the highest salary in their department (using QUALIFY)
select department, employee_name, salary
from snowpark_db.sourced.employees
qualify row_number() over(
    partition by department
    order by salary desc) = 1
order by department;

In [None]:
df = dft.select("department", "employee_name", "salary"
    ).filter(row_number().over(winSpec)==1).sort("department")
df.queries
df

In [None]:
df = dft.with_column("rank", row_number().over(winSpec)
    ).filter(col("rank")==1).sort("department")
df.queries
df

In [None]:
-- employees with total salaries in their department
select employee_name,
    sum(salary) over (partition by department) as total_salaries
from snowpark_db.sourced.employees
order by 1;

In [None]:
df = dft.select("employee_name", 
    sum("salary").over(
        Window.partition_by("department")
    ).alias("total_salaries")
    ).sort("employee_name")
df.queries
df

In [None]:
-- cumulative sum of salaries for the employees, in alphabetical order
select employee_name, salary,
    sum(salary) over (order by employee_name) as cum_salary
from snowpark_db.sourced.employees
order by employee_name;

In [None]:
df = dft.select("employee_name", "salary", 
    sum("salary").over(
        Window.order_by("employee_name")
    ).alias("cum_salaries")
    ).sort("employee_name")
df.queries
df

In [None]:
-- max salary between current employee and the next, in alphabetical order
select employee_name, salary,
    max(salary) over (
        order by employee_name
        rows between current row and 1 following) as max_salary
from snowpark_db.Sourced.employees
order by employee_name

In [None]:
df = dft.select("employee_name", "salary", 
    max("salary").over(
        Window.order_by("employee_name").rows_between(Window.CURRENT_ROW, 1)
    ).alias("max_salaries")
    ).sort("employee_name")
df.queries
df