In [None]:
# Snowpark Pandas API
import modin.pandas as spd
# Import the Snowpark pandas plugin for modin
import snowflake.snowpark.modin.plugin

import snowflake.snowpark.functions as F
from snowflake.snowpark.context import get_active_session

In [None]:
# Create a snowpark session
session = get_active_session()
# Add a query tag to the session for troubleshooting and monitoring
session.query_tag = {"origin":"sf_devrel", 
                     "name":"de_100_vhol", 
                     "version":{"major":1, "minor":0},
                     "attributes":{"is_quickstart":1, "source":"notebook", "vignette":"snowpark_pandas"}}


In [None]:
shipping_logs_mdf = spd.read_csv('shipping-logs.csv',
                               parse_dates=['shipping_date'])
shipping_logs_mdf

In [None]:
order_history_mdf = spd.read_csv('order-history.csv',
                               parse_dates=['Date'])
order_history_mdf

In [None]:
order_history_mdf = order_history_mdf.rename(columns = {'Order ID': 'order_id',
                                                      'Customer ID': 'customer_id',
                                                      'Product ID': 'product_id',
                                                      'Product Name': 'product_name',
                                                      'Quantity Ordered': 'quantity_ordered',
                                                      'Price': 'price',
                                                      'Total Price': 'total_price',
                                                     'Date': 'date'})

order_history_mdf.columns

In [None]:
def clean_price(price_str):
    # Remove $ sign and any whitespace
    cleaned = price_str.replace('$', '').strip()
    # Convert to float
    return float(cleaned)

In [None]:
# Apply cleaning to price columns
order_history_mdf['price'] = order_history_mdf['price'].apply(clean_price)
order_history_mdf['total_price'] = order_history_mdf['total_price'].apply(clean_price)

# Verify the cleaning
print("\nPrice column data type:", order_history_mdf['price'].dtype)
print("Total price column data type:", order_history_mdf['total_price'].dtype)

In [None]:
order_shipping_mdf = spd.merge(
    order_history_mdf,
    shipping_logs_mdf,
    on='order_id',
    how='inner'
)
order_shipping_mdf.head(5)

In [None]:
# Group by product name and count orders
product_counts_mdf = order_shipping_mdf.groupby('product_name').size().reset_index(name='order_count')

# Sort by order count in descending order
product_counts_mdf = product_counts_mdf.sort_values('order_count', ascending=False)

# Display the results
print("\nProduct Order Counts:")
print(product_counts_mdf)

In [None]:
product_status_pivot_mdf = order_shipping_mdf.pivot_table(
    index='product_name',
    columns='status',
    values='order_id',
    aggfunc='count',
    fill_value=0
)

# Add a total column
product_status_pivot_mdf['Total_Orders'] = product_status_pivot_mdf.sum(axis=1)

# Sort by total orders in descending order
product_status_pivot_mdf = product_status_pivot_mdf.sort_values('Total_Orders', ascending=False)

# Display the results
print("\nProduct Orders by Status:")
print(product_status_pivot_mdf)

In [None]:
-- Create the avalanche database and schema
CREATE OR REPLACE DATABASE avalanche_test_db;
CREATE OR REPLACE SCHEMA avalanche_test_schema;

USE DATABASE avalanche_test_db;
USE SCHEMA avalanche_test_schema;

-- Create the stage for storing our files
CREATE OR REPLACE STAGE avalanche_stage
  URL = 's3://sfquickstarts/misc/avalanche/csv/'
  DIRECTORY = (ENABLE = TRUE AUTO_REFRESH = TRUE);

ls @avalanche_stage;


In [None]:
CREATE OR REPLACE TABLE customer_reviews (
    product VARCHAR,
    date DATE,
    summary TEXT,
    sentiment_score FLOAT
);

-- Load customer reviews
COPY INTO customer_reviews
FROM @avalanche_stage/customer_reviews.csv
FILE_FORMAT = (
    TYPE = CSV
    FIELD_DELIMITER = ','
    SKIP_HEADER = 1
    FIELD_OPTIONALLY_ENCLOSED_BY = '"'
    TRIM_SPACE = TRUE
    NULL_IF = ('NULL', 'null')
    EMPTY_FIELD_AS_NULL = TRUE
);

In [None]:
customer_reviews_sdf = session.table('customer_reviews')
customer_reviews_sdf

In [None]:
product_sentiment_sdf = customer_reviews_sdf.group_by('PRODUCT') \
    .agg(F.round(F.avg('SENTIMENT_SCORE'),2).alias('AVG_SENTIMENT_SCORE')) \
    .sort(F.col('AVG_SENTIMENT_SCORE').desc())

# Display the results
print("\nAverage Sentiment Scores by Product:")
product_sentiment_sdf.show()

In [None]:
product_sentiment_sdf.write.save_as_table('PRODUCT_SENTIMENT_ANALYSIS', mode='overwrite')

In [None]:
-- Create the task
CREATE OR REPLACE TASK avalanche_analysis_task
    WAREHOUSE = COMPUTE_WH
    SCHEDULE = 'USING CRON 0 0 * * * America/New_York'  -- Runs daily at midnight
    AS
BEGIN
    -- Create database and schema if they don't exist
    CREATE DATABASE IF NOT EXISTS avalanche_test_db;
    CREATE SCHEMA IF NOT EXISTS avalanche_test_db.avalanche_test_schema;
    
    -- Use the database and schema
    USE DATABASE avalanche_test_db;
    USE SCHEMA avalanche_test_schema;
    
    -- Create the stage for storing files
    CREATE OR REPLACE STAGE avalanche_stage
        URL = 's3://sfquickstarts/misc/avalanche/csv/'
        DIRECTORY = (ENABLE = TRUE AUTO_REFRESH = TRUE);
    
    -- Create and load customer reviews table
    CREATE OR REPLACE TABLE customer_reviews (
        product VARCHAR,
        date DATE,
        summary TEXT,
        sentiment_score FLOAT
    );
    
    -- Load customer reviews
    COPY INTO customer_reviews
    FROM @avalanche_stage/customer_reviews.csv
    FILE_FORMAT = (
        TYPE = CSV
        FIELD_DELIMITER = ','
        SKIP_HEADER = 1
        FIELD_OPTIONALLY_ENCLOSED_BY = '"'
        TRIM_SPACE = TRUE
        NULL_IF = ('NULL', 'null')
        EMPTY_FIELD_AS_NULL = TRUE
    );
    
    -- Calculate and save product sentiment scores
    CREATE OR REPLACE TABLE product_sentiment_analysis AS
    SELECT 
        product,
        ROUND(AVG(sentiment_score), 2) as avg_sentiment_score
    FROM customer_reviews
    GROUP BY product
    ORDER BY avg_sentiment_score DESC;
END;

In [None]:
-- Enable the task
ALTER TASK avalanche_analysis_task RESUME;