# Package Installation for Snowflake Connection and Data Processing

In [1]:
!pip install  dask[complete]  snowflake  snowflake-connector-python snowflake-snowpark-python snowflake-snowpark-python[pandas] seaborn matplotlib numpy pandas scikit-learn  fosforml plotly

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


# Import necessary libraries

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from plotly import express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Set display options for better output visibility
pd.set_option('display.max_columns', 100)

# This section connects to Snowflake using **fosforml's Snowflake session manager**, retrieves data from a specified Snowflake table, and loads the data into a Pandas DataFrame for further processing and modeling.


In [3]:
# Import the get_session function from fosforml's Snowflake session manager
# This function helps in establishing a Snowflake session for executing queries
from fosforml.model_manager.snowflakesession import get_session

# Establish a Snowflake session using the configured session manager
# The session will allow you to execute SQL queries and retrieve data from Snowflake
my_session = get_session()

#  Define the name of the table you want to query from Snowflake
# This table should contain the data for order statuses, including DELIVERED, IN PROCESS, CANCELLED, and RETURNED orders
table_name = 'ORDER_DATA_FINAL'

#  Execute a SQL query to select all records from the specified Snowflake table
# The data is retrieved from Snowflake and loaded into a Pandas DataFrame for further processing
df = my_session.sql(f"SELECT * FROM {table_name}").to_pandas()

# Display the first few rows of the retrieved DataFrame to verify the data load
df.head()


Unnamed: 0,DIVISION_CODE,DIVISION_NAME,BRAND_CODE,BRAND_NAME,CLASS_CODE,CLASS_NAME,SELLING_CHANNEL,CHAIN,WEB_ORDER_NUMBER,OMS_ORDER_NUMBER,OMS_LINE_ITEM_ID,OMS_TICKET_ID,SKU_ID,QUANTITY,UNIT_PRICE,CURRENT_STATUS,CURRENT_STATUS_DESCRIPTION,TRANSACTION_DATE,SHIP_FROM_WAREHOUSE_CODE,SHIP_FROM_WAREHOUSE_DESCRIPTION,ORDER_DATE,READY_TO_PRINT_DATE,PRINT_TICKET_DATE,VERIFIED_SHIPPED_DATE,BACK_ORDERED_DATE,ORDER_AGE,GIFT_ARTICLE_FLAG,CARRIER_NAME,CARRIER_TRACKING_NUMBER,DROPSHIP_FLAG,ORDER_STATUS,ORDER_CREATION_DATE,ORDER_CONFIRMATION_DATE,WM_ORDER_ID,WM_ORDER_LINE_ID,WM_ORDER_STATUS,WM_PICKING_START_TIME,WM_PICKING_END_TIME,WM_PICKING_AGE,WM_PACKING_START_TIME,WM_PACKING_END_TIME,WM_PACKING_AGE,WM_CREATED_DATE,WM_UPDATED_DATE,WM_SHIPPED_DATE,WM_ORDER_AGE,STORE_ID,STORE_NAME,SHIP_METHOD_CODE,SHIP_METHOD_NAME,SHIP_METHOD_SERVICE,SHIPMENT_SLA,NEW_ORDER_DATE,RETURN_REASON,RETURN_FLAG,RECORD_DATE,RECORD_TIME,GROSS_SALES,RETURNED_STATUS,SHIPPING_DELAY,PREDICTION_RESULT
0,18,Intimate Apparel,11630,Falke,117,Socks,Online,Chain2,WC200001283027,51944671,51944671*1,51944671-1,301224807145,2,20.0,VS,Shipped,07-03-2024,5,Oakbrook,2024-03-29,2024-03-06 13:00:00,2024-03-06 13:00:00,2024-03-07 11:01:23,,1,False,FedEx,******597111,True,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2024-03-29,,,,FEDXH,FedEx Home Delivery,GROUND,,2022-12-27,,,,,40,DELIVERED,False,1
1,11,Women's Apparel,15332,TSE,2,Sweaters,Online,Chain1,WC100004199018,51986531,51986531*2,51986531-2,301231641008,1,130.0,CX,Cancelled,11-03-2024,5,Oakbrook,2024-02-17,2024-03-10 20:00:52,2024-03-09 13:00:00,NaT,10-03-2024,2,False,,,False,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2024-02-17,,6247.0,,,,GROUND,,2023-05-07,,,,,130,CANCELLED,False,1
2,11,Women's Apparel,11685,Elie Tahari,4,Pants,Online,Chain1,WC100004151518,51905876,51905876*3,51905876-3,301237540220,1,295.0,VS,Shipped,04-03-2024,5,Oakbrook,2024-03-23,2024-03-02 13:00:00,2024-03-02 13:00:00,2024-03-04 15:02:27,,2,False,FedEx,******422423,True,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2024-03-23,,,,ON,FedEx - Standard Overnight,OVERNIGHT,,2022-10-19,,,,,295,DELIVERED,False,0
3,11,Women's Apparel,45644,LK Bennett,124,Dress,Online,Chain1,WC100004179746,51953152,51953152*3,51953152-3,301238523284,1,495.0,VS,Shipped,08-03-2024,5,Oakbrook,2024-08-15,2024-03-06 13:00:00,2024-03-06 13:00:00,2024-03-08 05:01:39,,2,False,FedEx,******853960,True,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2024-08-15,,,,FEDXH,FedEx Home Delivery,GROUND,,2022-10-09,,,,,495,DELIVERED,False,1
4,56,Jewelry,13823,NEST Jewelry,40,Rings,Online,Chain1,WC100004190179,51970366,51970366*1,51970366-1,301208806164,1,95.0,VS,Shipped,08-03-2024,5,Oakbrook,2023-03-20,2024-03-08 13:00:00,2024-03-08 13:00:00,2024-03-08 13:01:33,,0,False,FedEx,******885942,True,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2023-03-20,,,,FEDXH,FedEx Home Delivery,GROUND,,2022-09-16,,,,,95,DELIVERED,False,0


In [4]:
df['RETURNED_STATUS'].value_counts()

RETURNED_STATUS
DELIVERED     759943
RETURNED       94089
CANCELLED      62752
IN PROCESS      9860
Name: count, dtype: int64

In [5]:
df_sample = df[df['RETURNED_STATUS'].isin(['CANCELLED', 'RETURNED', 'DELIVERED', 'IN PROCESS'])]
df_sample = df.groupby('RETURNED_STATUS').sample(n=10000, replace=True)
df_sample


Unnamed: 0,DIVISION_CODE,DIVISION_NAME,BRAND_CODE,BRAND_NAME,CLASS_CODE,CLASS_NAME,SELLING_CHANNEL,CHAIN,WEB_ORDER_NUMBER,OMS_ORDER_NUMBER,OMS_LINE_ITEM_ID,OMS_TICKET_ID,SKU_ID,QUANTITY,UNIT_PRICE,CURRENT_STATUS,CURRENT_STATUS_DESCRIPTION,TRANSACTION_DATE,SHIP_FROM_WAREHOUSE_CODE,SHIP_FROM_WAREHOUSE_DESCRIPTION,ORDER_DATE,READY_TO_PRINT_DATE,PRINT_TICKET_DATE,VERIFIED_SHIPPED_DATE,BACK_ORDERED_DATE,ORDER_AGE,GIFT_ARTICLE_FLAG,CARRIER_NAME,CARRIER_TRACKING_NUMBER,DROPSHIP_FLAG,ORDER_STATUS,ORDER_CREATION_DATE,ORDER_CONFIRMATION_DATE,WM_ORDER_ID,WM_ORDER_LINE_ID,WM_ORDER_STATUS,WM_PICKING_START_TIME,WM_PICKING_END_TIME,WM_PICKING_AGE,WM_PACKING_START_TIME,WM_PACKING_END_TIME,WM_PACKING_AGE,WM_CREATED_DATE,WM_UPDATED_DATE,WM_SHIPPED_DATE,WM_ORDER_AGE,STORE_ID,STORE_NAME,SHIP_METHOD_CODE,SHIP_METHOD_NAME,SHIP_METHOD_SERVICE,SHIPMENT_SLA,NEW_ORDER_DATE,RETURN_REASON,RETURN_FLAG,RECORD_DATE,RECORD_TIME,GROSS_SALES,RETURNED_STATUS,SHIPPING_DELAY,PREDICTION_RESULT
692563,18,Intimate Apparel,10332,Natori,42,Bras,Others,Chain1,CC62214473,52214473,52214473*2,,301077653920,1,72.0,CX,Cancelled,26-03-2024,6,STORES,2024-03-29,NaT,NaT,NaT,,0,False,FedEx,,false,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2024-03-29,,,,SDD,FedEx - 2 Day,2ND DAY,,2023-09-29,,,,,72,CANCELLED,False,1
330822,44,Men's,11697,Robert Graham,1,Shirts/Tops,Online,Chain1,WC100004273290,52100687,52100687*6,52100687-5,301124167264,1,171.0,CX,Cancelled,20-03-2024,5,Oakbrook,2024-09-08,2024-03-18 13:00:00,2024-03-18 13:00:00,NaT,,2,False,FedEx,,true,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2024-09-08,,,,FEDXH,FedEx Home Delivery,GROUND,,2023-01-18,,,,,171,CANCELLED,False,1
810572,53,Beauty,11530,Dior,995,GWP's,Online,Chain1,WC100004282119,52113042,52113042*6,,301227320795,1,0.0,CX,Cancelled,19-03-2024,5,Oakbrook,2023-10-12,NaT,NaT,NaT,,0,True,,,false,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2023-10-12,,,,,,GROUND,,2022-10-21,,,,,0,CANCELLED,False,1
658745,81,Women's Designer RTW,12587,JW Anderson,1,Shirts/Tops,Online,Chain2,WC200001330323,52301706,52301706*1,52301706-1,301235168679,1,435.0,CX,Cancelled,04-04-2024,3,Tampa Bay,2024-07-17,2024-04-03 13:00:00,NaT,NaT,,1,False,Need it Now,,false,,1900-01-01,1900-01-01,,******,Not in MAO,NaT,NaT,,NaT,NaT,,1900-01-01,1900-01-01,1900-01-01,-9999.0,,,NDTN3,Need it Now,GROUND,,2024-01-17,,,,,435,CANCELLED,False,1
567439,53,Beauty,11303,CREED,312,Fragrance,Online,Chain2,WC200001278989,51922351,51922351*1,51922351-1,301235149364,1,81.0,CX,Cancelled,04-03-2024,3,Tampa Bay,2024-04-25,2024-03-04 13:00:00,NaT,NaT,,0,False,FedEx,,false,,1900-01-01,1900-01-01,,******,Not in MAO,NaT,NaT,,NaT,NaT,,1900-01-01,1900-01-01,1900-01-01,-9999.0,,,FEDXH,FedEx Home Delivery,GROUND,,2023-10-25,,,,,81,CANCELLED,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
659119,34,Ladies Shoes,10538,Badgley Mischka,60,Ballerinas/Flats,Online,Chain1,WC100004315568,52162486,52162486*5,52162486-1,301236874869,-1,198.0,RX,Temp Return Credit,01-04-2024,4,Denver,2024-07-25,NaT,NaT,NaT,,9,False,FedEx,******095650,true,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2024-07-25,,6088.0,,FEDXH,FedEx Home Delivery,GROUND,,2023-11-18,Changed Mind,Partially Returned,2024-10-01,04:23:00.597000,-198,RETURNED,False,1
462406,36,Fashion Accessories,11552,Dolce&Gabbana,89,Sunglasses,Others,Chain1,STA000000000543227,51926106,51926106*2,51926106-1,301234595612,-1,345.0,RT,Returned,26-03-2024,5,Oakbrook,2023-12-19,2024-03-04 14:03:37,2024-03-05 09:07:27,NaT,,22,False,FedEx,******102371,false,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2023-12-19,,6247.0,,FEDXH,FedEx Home Delivery,GROUND,,2023-06-19,Size Issues,,,,-345,RETURNED,False,1
460914,34,Ladies Shoes,20266,Carrie Forbes,54,Sandals,Online,Chain1,WC100004148691,51900443,51900443*10,51900443-2,301125196638,-1,242.0,RT,Returned,27-03-2024,5,Oakbrook,2024-06-12,2024-03-02 12:43:24,2024-03-02 12:46:52,NaT,,25,False,FedEx,******677317,false,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2024-06-12,,6247.0,,FEDXH,FedEx Home Delivery,GROUND,,2023-05-07,Size Issues,,,,-242,RETURNED,False,1
526514,35,Designer Handbags,10864,Brunello Cucinelli,20,Tote,Online,Chain1,WC100004370191,52245250,52245250*4,52245250-1,301234950992,-1,3100.0,RT,Returned,05-04-2024,4,Denver,2023-12-20,2024-03-29 15:33:31,2024-03-29 16:17:00,NaT,,7,False,FedEx,******296390,false,,NaT,NaT,,,,NaT,NaT,,NaT,NaT,,NaT,NaT,2023-12-20,,6088.0,,FEDXH,FedEx Home Delivery,GROUND,,2022-08-13,Changed Mind,,,,-3100,RETURNED,False,1


# Dictionary to map status codes to full forms

In [6]:
status_map = {
    'VS': 'Verified Shipped', 
    'RT': 'Ready to Ship',
    'CX': 'Cancelled', #cancelled #training columns
    'RX': 'Return to Sender',
    'BO': 'Backordered', #cancelled #training columns
    'PT': 'Print Ticket', #cancelled #training columns
    'DS': 'Drop Shipped', 
    'RN': 'Returned',
    'RP': 'Replacement',
    'CB': 'Chargeback', #cancelled #training columns
    'RD': 'Ready to Drop' #cancelled #training columns
}

# Replace the values in the CURRENT_STATUS column
df_sample['CURRENT_STATUS'] = df_sample['CURRENT_STATUS'].replace(status_map)

df_sample['CURRENT_STATUS'].value_counts()

CURRENT_STATUS
Cancelled           10000
Verified Shipped    10000
Ready to Ship        8647
Backordered          5424
Print Ticket         2420
Drop Shipped         1296
Return to Sender     1276
Replacement           744
Returned               77
Chargeback             74
Ready to Drop          42
Name: count, dtype: int64

# Converting Date Columns to Datetime Format with Error Handling

In [7]:
# List of columns that contain date-related data
date_columns = ['ORDER_DATE', 'TRANSACTION_DATE', 'VERIFIED_SHIPPED_DATE', 'READY_TO_PRINT_DATE', 
                'PRINT_TICKET_DATE', 'WM_PICKING_START_TIME', 'WM_PICKING_END_TIME', 'WM_SHIPPED_DATE']

#  Loop through each column in the list and convert it to a datetime object
for col in date_columns:
    # Convert each column to datetime, using 'coerce' to handle errors in formatting
    df_sample[col] = pd.to_datetime(df_sample[col], errors='coerce')

#  Converting Quantity and Unit Price to Numeric Format

In [8]:
#  Convert the 'QUANTITY' column to a numeric integer type
df_sample['QUANTITY'] = pd.to_numeric(df_sample['QUANTITY'], downcast='integer')

#  Convert the 'UNIT_PRICE' column to a numeric float type
df_sample['UNIT_PRICE'] = pd.to_numeric(df_sample['UNIT_PRICE'], downcast='float')


# Converting SHIPMENT_SLA to Numeric and Calculating Shipping Delay

In [9]:
# Check if SHIPMENT_SLA is a string and convert it to numeric (assuming it's a number)
df_sample['SHIPMENT_SLA'] = pd.to_numeric(df_sample['SHIPMENT_SLA'], errors='coerce')

# Now calculate the shipping delay
df_sample['SHIPPING_DELAY'] = (df_sample['WM_SHIPPED_DATE'] - df_sample['ORDER_DATE']).dt.days > df_sample['SHIPMENT_SLA']

# Handling Missing Values in Shipping Delay and Date Columns

In [10]:
# Fill NaN values in SHIPPING_DELAY if necessary
df_sample['SHIPPING_DELAY'].fillna(False, inplace=True)  # Assuming False for missing data

# Ensure that date columns are in datetime format
df_sample['ORDER_DATE'] = pd.to_datetime(df_sample['ORDER_DATE'], errors='coerce')
df_sample['WM_SHIPPED_DATE'] = pd.to_datetime(df_sample['WM_SHIPPED_DATE'], errors='coerce')

# Fill missing date columns with a default value if necessary (e.g., using the current date or another business logic)
df_sample['WM_SHIPPED_DATE'].fillna(df_sample['ORDER_DATE'], inplace=True)  # Assuming shipped same as order date if missing


# Converting SHIPMENT_SLA and Handling Shipping Delay Calculation

In [11]:
# Ensure 'SHIPMENT_SLA' values are converted to numeric, coercing errors for non-numeric values
df_sample['SHIPMENT_SLA'] = pd.to_numeric(df_sample['SHIPMENT_SLA'], errors='coerce')

# Convert 'ORDER_DATE' and 'WM_SHIPPED_DATE' to datetime, coercing errors for invalid formats
df_sample['ORDER_DATE'] = pd.to_datetime(df_sample['ORDER_DATE'], errors='coerce')
df_sample['WM_SHIPPED_DATE'] = pd.to_datetime(df_sample['WM_SHIPPED_DATE'], errors='coerce')

# Assuming that if 'WM_SHIPPED_DATE' is missing, the order was shipped on the same day as the order was placed
df_sample['WM_SHIPPED_DATE'].fillna(df_sample['ORDER_DATE'], inplace=True)


# Calculate whether the actual shipping time (in days) exceeded the SLA, resulting in a shipping delay
df_sample['SHIPPING_DELAY'] = (df_sample['WM_SHIPPED_DATE'] - df_sample['ORDER_DATE']).dt.days > df_sample['SHIPMENT_SLA']

# Fill missing values in 'SHIPPING_DELAY' with False, assuming no delay if data is missing
df_sample['SHIPPING_DELAY'].fillna(False, inplace=True)


# Writing Pandas DataFrame to Snowflake Table

In [12]:
# Create a Snowflake DataFrame from the existing Pandas DataFrame (df) using the Snowflake session
training_datadf = my_session.createDataFrame(df_sample)

# Save the Snowflake DataFrame as a table named 'ORDER_DATA_TRAINING' in Snowflake
# The 'overwrite' mode ensures that the table is replaced if it already exists
training_datadf.write.mode("overwrite").save_as_table("ORDER_DATA_TRAINING")
