In [1]:
import pandas as pd
import numpy as np

# Load dataset into a DataFrame
df = pd.read_csv('DataCoSupplyChainDataset.csv', encoding='latin-1')

In [2]:
# Convert 'DateOrders' to datetime format
df['order date (DateOrders)'] = pd.to_datetime(df['order date (DateOrders)'])

# Extract year and month from 'DateOrders'
df['Year'] = df['order date (DateOrders)'].dt.year
df['Month'] = df['order date (DateOrders)'].dt.month

# Group by 'Year', 'Month', 'Product Card Id', and 'Department Id' and aggregate metrics
fact_product_monthly = df.groupby(['Year', 'Month', 'Product Card Id', 'Department Id']).agg({
    'Product Image': 'first',
    'Product Name': 'first',
    'Product Price': 'first',
    'Category Id': 'first',
    'Sales': 'sum',
    'Order Item Quantity': 'sum',
    'Order Item Discount': 'sum',
    'Order Item Total': 'sum',
    'Order Profit Per Order': 'sum',
    'Order Id': 'nunique'            # Count of unique order ids for each product
}).reset_index()

# Create a Date column combining Year and Month
fact_product_monthly['Date'] = pd.to_datetime(fact_product_monthly[['Year', 'Month']].assign(day=1))

# Drop 'Year' and 'Month' columns
fact_product_monthly.drop(['Year', 'Month'], axis=1, inplace=True)

# Rename columns for better understanding
fact_product_monthly.rename(columns={
    'Product Card Id': 'Product Id',
    'Department Id': 'Department Id',
    'Category Id': 'Category Id',
    'Product Image': 'Product Image',
    'Product Name': 'Product Name',
    'Product Price': 'Product Price',
    'Sales': 'Total Sales (before discount)',
    'Order Item Quantity': 'Units Sold',
    'Order Item Discount': 'Amount Discounted',
    'Order Item Total': 'Total Sales (After Discount)',
    'Order Profit Per Order': 'Total Profit',
    'Order Id': 'Order Count',
}, inplace=True)

# Add prefix of 'D' to Department Id
fact_product_monthly['Department Id'] = 'D' + fact_product_monthly['Department Id'].astype(str)

# Display the first few rows of the fact_product_performance_monthly DataFrame
print(fact_product_monthly.head())

   Product Id Department Id  \
0          37            D2   
1          44            D2   
2          93            D2   
3         116            D2   
4         134            D2   

                                       Product Image  \
0  http://images.acmesports.sports/adidas+Kids%27...   
1  http://images.acmesports.sports/adidas+Men%27s...   
2  http://images.acmesports.sports/Under+Armour+M...   
3  http://images.acmesports.sports/Nike+Men%27s+C...   
4  http://images.acmesports.sports/Nike+Women%27s...   

                                 Product Name  Product Price  Category Id  \
0       adidas Kids' F5 Messi FG Soccer Cleat      34.990002            3   
1  adidas Men's F10 Messi TRX FG Soccer Cleat      59.990002            3   
2          Under Armour Men's Tech II T-Shirt      24.990000            5   
3                  Nike Men's Comfort 2 Slide      44.990002            6   
4          Nike Women's Legend V-Neck T-Shirt      25.000000            7   

   Total Sale

In [3]:
fact_product_monthly.describe()

Unnamed: 0,Product Id,Product Price,Category Id,Total Sales (before discount),Units Sold,Amount Discounted,Total Sales (After Discount),Total Profit,Order Count,Date
count,1818.0,1818.0,1818.0,1818.0,1818.0,1818.0,1818.0,1818.0,1818.0,1818
mean,634.755226,73.85158,29.331133,20233.62762,211.264576,2051.913313,18181.739483,2182.014837,87.878438,2016-05-12 07:19:36.237623552
min,19.0,9.99,2.0,95.939999,1.0,4.25,90.66,-2519.929992,1.0,2015-01-01 00:00:00
25%,295.0,24.99,16.0,699.800023,23.0,66.5575,624.217497,54.085001,9.0,2015-09-01 00:00:00
50%,771.0,34.990002,35.0,1119.70002,33.0,114.225,1004.515005,156.395002,12.0,2016-05-01 00:00:00
75%,885.0,65.0,40.0,2650.11757,48.0,280.4775,2377.365053,417.467501,16.0,2017-02-01 00:00:00
max,1363.0,1999.98999,76.0,525000.0,2432.0,53497.5,471502.5,52209.79022,657.0,2018-01-01 00:00:00
std,318.878744,120.413351,14.292846,46267.886693,479.381976,4693.188111,41575.162295,5078.271862,172.276183,


In [4]:
len(fact_product_monthly)

1818

In [5]:
fact_product_monthly.to_excel('ProcessedMonthlyProductPeformanceFact.xlsx', index=False)

print("Monthly Product Performance Fact table saved successfully")

Monthly Product Performance Fact table saved successfully
