# Connection

> Helps Manage Snowflake Connection

In [1]:
#| default_exp functions

In [6]:
#| hide
from nbdev.showdoc import *

In [7]:
#| export

from __future__ import annotations
from customfunctions.common import print_hello
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, sum, avg, count, to_char, current_timestamp

import json

In [None]:
#| export


def hello_function(name: str) -> str:
    return print_hello(name)

In [None]:
#| export


def perform_aggregation(session: Session, aggregation_request: str) -> str:
    """
    Performs data aggregation based on the provided aggregation request.

    This function takes a Snowflake session and an aggregation request in JSON format,
    applies the specified filters, groups the data, calculates the requested metrics,
    and saves the results to a target table in Snowflake.

    Parameters
    session : snowflake.snowpark.Session
        An active Snowflake session object.
    aggregation_request : str
        A JSON string containing the aggregation specifications. It should include:
        - source_table: Name of the source table in Snowflake.
        - target_table: Name of the table where results will be saved.
        - group_by: List of columns to group by.
        - metrics: List of metrics to calculate, each with a name, function, and column.
        - filters: List of filters to apply to the data.

    The aggregation_request should have the following structure:
        - TODO: Think about fine grain control, but for now you can have 
                <database>.<schema>.<table> and if the caller has access
                then it will work correctly.
    {
        "source_table": str,
        "target_table": str,
        "group_by": List[str],
        "metrics": List[Dict[str, str]],
        "filters": List[Dict[str, Union[str, List[str]]]]
    }

    Returns:
    --------
    None
        The function doesn't return a value, but it prints a confirmation message
        and saves the results to the specified target table in Snowflake.
    """

    request = json.loads(aggregation_request)
    
    df = session.table(request['source_table'])
    for filter in request['filters']:
        if filter['operator'] == 'between':
            df = df.filter((col(filter['column']) >= filter['value'][0]) & 
                           (col(filter['column']) <= filter['value'][1]))
        elif filter['operator'] == 'in':
            df = df.filter(col(filter['column']).isin(filter['value']))

    df = df.group_by(request['group_by'])

    aggs = []
    for metric in request['metrics']:
        if metric['function'] == 'sum':
            aggs.append(sum(col(metric['column'])).alias(metric['name']))
        elif metric['function'] == 'avg':
            aggs.append(avg(col(metric['column'])).alias(metric['name']))
        elif metric['function'] == 'count':
            aggs.append(count(col(metric['column'])).alias(metric['name']))
    df = df.agg(*aggs).with_column('created', to_char(current_timestamp(), 'YYYY-MM-DD HH24:MI:SS'))
    
    df.write.mode("overwrite").save_as_table(request['target_table'])
    
    return f"Aggregation completed. Results saved to {request['target_table']}"

In [10]:
from customfunctions.connection import SnowflakeConnection
from snowflake.snowpark.version import VERSION

import os

In [11]:
try: 

    # We can also use Snowpark for our analyses!
    from snowflake.snowpark.context import get_active_session
    session = get_active_session()

except Exception as e:
    print(f"No active session with get_active_session() moving to create_snowflake_session. Error Seen:\n{e}")
    config = {
        'user': os.getenv('SNOWFLAKE_USER', ''),
        'password': os.getenv('SNOWFLAKE_PASSWORD', ''),
        'account': os.getenv('SNOWFLAKE_ACCOUNT', ''),
        'database': 'DATASCIENCE',
        'warehouse': 'DS_WH_XS',
        'schema': 'CUSTOM_FUNCTIONS',
        'role': 'DATA_SCIENTIST'
    }
    sfc = SnowflakeConnection(**config)
    session = sfc.get_session()
    snowflake_environment = session.sql('SELECT current_user(), current_version()').collect()
    snowpark_version = VERSION
    print('\nConnection Established with the following parameters:')
    print('Role                        : {}'.format(session.get_current_role()))
    print('Database                    : {}'.format(session.get_current_database()))
    print('Schema                      : {}'.format(session.get_current_schema()))
    print('Warehouse                   : {}'.format(session.get_current_warehouse()))
    print('Snowflake version           : {}'.format(snowflake_environment[0][1]))
    print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0],snowpark_version[1],snowpark_version[2]))


No active session with get_active_session() moving to create_snowflake_session. Error Seen:
(1403): No default Session is found. Please create a session before you call function 'udf' or use decorator '@udf'.

Connection Established with the following parameters:
Role                        : "DATA_SCIENTIST"
Database                    : "DATASCIENCE"
Schema                      : "CUSTOM_FUNCTIONS"
Warehouse                   : "DS_WH_XS"
Snowflake version           : 8.46.1
Snowpark for Python version : 1.26.0


In [14]:
aggregation_request = json.dumps({
    "request_id": "AGG_001",
    "source_table": "orders",
    "target_table": "orders_aggregates",
    "group_by": ["PRODUCT_CATEGORY", "REGION"],
    "metrics": [
        {"name": "TOTAL_SALES", "function": "sum", "column": "SALES_AMOUNT"},
        {"name": "AVERAGE_ORDER_VALUE", "function": "avg", "column": "ORDER_VALUE"},
        {"name": "ORDER_COUNT", "function": "count", "column": "ORDER_ID"}
    ],
    "filters": [
        {"column": "DATE", "operator": "between", "value": ["2023-01-01", "2023-12-31"]},
        {"column": "STATUS", "operator": "in", "value": ["completed", "shipped"]}
    ],
    "version": "1.0.0"
})

perform_aggregation(session, aggregation_request)

session.sql('SELECT * FROM orders_aggregates').show(10)

---------------------------------------------------------------------------------------------------------------
|"PRODUCT_CATEGORY"  |"REGION"  |"TOTAL_SALES"  |"AVERAGE_ORDER_VALUE"  |"ORDER_COUNT"  |"CREATED"            |
---------------------------------------------------------------------------------------------------------------
|Electronics         |North     |121611.75      |502.2308764940239      |251            |2024-12-17 11:47:04  |
|Sports              |South     |127182.67      |511.0550199203187      |251            |2024-12-17 11:47:04  |
|Clothing            |South     |136309.18      |548.9966798418973      |253            |2024-12-17 11:47:04  |
|Home                |South     |151382.34      |480.46474637681155     |276            |2024-12-17 11:47:04  |
|Electronics         |East      |137977.52      |494.1362548262548      |259            |2024-12-17 11:47:04  |
|Books               |West      |135392.85      |489.70191570881224     |261            |2024-12-17 11:4

In [15]:
#| hide
import nbdev; nbdev.nbdev_export()