## 0. Imports & Display Options


In [116]:
import pandas as pd
import numpy as np
from pathlib import Path

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 120)

## 1. Data Ingestion


In [117]:
data_urls = {
    "pricing": "https://storage.googleapis.com/karmicseed-assessments-data/pricing_data.json",
    "competitor": "https://storage.googleapis.com/karmicseed-assessments-data/competitor_data.json",
    "sales": "https://storage.googleapis.com/karmicseed-assessments-data/historical_sales.json",
    "returns": "https://storage.googleapis.com/karmicseed-assessments-data/returns_data.json",
    "inventory": "https://storage.googleapis.com/karmicseed-assessments-data/inventory_health.json",
    "ads": "https://storage.googleapis.com/karmicseed-assessments-data/ads_performance.json"
}

pricing_df = pd.read_json(data_urls['pricing'])
competitor_df = pd.read_json(data_urls['competitor'])
sales_df = pd.read_json(data_urls['sales'])
returns_df = pd.read_json(data_urls['returns'])
inventory_df = pd.read_json(data_urls['inventory'])
ads_df = pd.read_json(data_urls['ads'])

dataframes = {
    "Pricing": pricing_df,
    "Competitor": competitor_df,
    "Sales": sales_df,
    "Returns": returns_df,
    "Inventory": inventory_df,
    "Ads": ads_df
}

# Quick shape check
for name, df in dataframes.items():
    print(f"{name} Data Shape:", df.shape)

Pricing Data Shape: (50, 11)
Competitor Data Shape: (50, 5)
Sales Data Shape: (4243, 7)
Returns Data Shape: (50, 5)
Inventory Data Shape: (50, 28)
Ads Data Shape: (12777, 25)


In [118]:
pricing_df.head(3)

Unnamed: 0,SKU,Product_description,Product Role,Country_of_Origin,FBA Fee,Storage Fee,Handling_Cost,Cost,Current_Price,Minimum_Acceptable_Margin_%,Target_Gross_Margin_%
0,MN-01,Rectangle Tray -14 x 10 Inch - Pk of 25,Core,India,$15.11,$0.44,$0.75,$16.00,$38.90,20%,35%
1,MN-02,Rectangle Tray -12x10 - Pk of 25,Core,India,$13.55,$0.30,$0.75,$12.00,$33.90,20%,35%
2,MN-03,Oval Tray -15x10 Inch - Pk of 25,Core,India,$13.94,$0.34,$0.75,$15.00,$34.90,20%,35%


In [119]:
competitor_df.head(3)

Unnamed: 0,SKU,Avg_Competitor_Price,Lowest_Competitor_Price,Highest_Competitor_Price,Competitor_Count
0,MN-01,$36.90,$31.95,$44.90,9
1,MN-02,$32.50,$27.90,$39.95,8
2,MN-03,$35.20,$29.99,$42.50,7


In [120]:
sales_df.head(3)

Unnamed: 0,Date,SKU,Sessions - Total,Page Views - Total,Units Ordered,Ordered Product Sales,Total Order Items
0,2025-09-01,MN-01,22,32,6,228.1,5
1,2025-09-01,MN-02,28,33,5,158.96,5
2,2025-09-01,MN-03,9,12,2,81.35,2


In [121]:
returns_df.head(3)

Unnamed: 0,SKU,Return Quantity \n(Last 7 days),Return Quantity \n(Last 30 days),Return Quantity \n(Last 60 days),Return Quantity \n(Last 90 days)
0,MN-01,0,9,18,26
1,MN-02,0,4,14,24
2,MN-03,1,4,10,13


In [122]:
inventory_df.head(3)

Unnamed: 0,SKU,condition,total-inventory,available,inbound-shipped,inbound-received,reserved-quantity,unfulfillable-quantity,inv-age-0-to-30-days,inv-age-31-to-60-days,inv-age-61-to-90-days,inv-age-181-to-330-days,inv-age-331-to-365-days,inv-age-365-plus-days,currency,units-shipped-t7,units-shipped-t30,units-shipped-t60,units-shipped-t90,sell-through,item-volume,volume-unit-measurement,storage-type,storage-volume,marketplace,days-of-supply,weeks-of-cover-t30,weeks-of-cover-t90
0,MN-01,New,382,264,40,9,68,1,12,261,56,0,0,0,USD,59,203,386,574,1.33,0.546781,cubic feet,Standard,144.350184,US,47,6,7
1,MN-02,New,306,206,80,2,18,0,127,91,17,0,0,0,USD,29,155,261,417,1.56,0.373971,cubic feet,Standard,77.038026,US,85,7,8
2,MN-03,New,139,95,40,0,3,1,42,8,62,0,0,0,USD,13,67,116,179,1.16,0.425846,cubic feet,Standard,40.45537,US,83,8,9


In [123]:
ads_df.head(3)

Unnamed: 0,date,SKU,impressions,clicks,costPerClick,clickThroughRate,cost,spend,currency-code,purchases1d,purchases7d,purchases14d,purchases30d,unitsSoldClicks1d,unitsSoldClicks7d,unitsSoldClicks14d,unitsSoldClicks30d,sales1d,sales7d,sales14d,sales30d,acosClicks7d,acosClicks14d,roasClicks7d,roasClicks14d
0,2025-09-01,MN-16,2016,14,1.998,0.694,27.97,27.97,USD,1,1,1,1,1,1,1,1,18.89,18.89,18.89,18.89,148.0677607,148.0677607,0.6753664641,0.6753664641
1,2025-09-01,MN-15,1810,33,2.062,1.823,68.06,68.06,USD,4,6,6,6,4,6,6,6,107.45,161.31,161.31,161.31,42.19205257,42.19205257,2.370114605,2.370114605
2,2025-09-01,MN-14,613,9,2.85,1.468,25.65,25.65,USD,0,1,1,1,0,1,1,1,0.0,90.98,90.98,90.98,28.19300945,28.19300945,3.546978558,3.546978558


## 2. Standardize Column Names


In [124]:
def standardize_columns(df):
    df.columns = (
        df.columns
        .str.strip()
        .str.lower()
        .str.replace(" ", "_")
        .str.replace("-", "_")
    )
    return df

pricing_df = standardize_columns(pricing_df)
competitor_df = standardize_columns(competitor_df)
sales_df = standardize_columns(sales_df)
returns_df = standardize_columns(returns_df)
inventory_df = standardize_columns(inventory_df)
ads_df = standardize_columns(ads_df)


In [125]:
pricing_df.head(3)

Unnamed: 0,sku,product_description,product_role,country_of_origin,fba_fee,storage_fee,handling_cost,cost,current_price,minimum_acceptable_margin_%,target_gross_margin_%
0,MN-01,Rectangle Tray -14 x 10 Inch - Pk of 25,Core,India,$15.11,$0.44,$0.75,$16.00,$38.90,20%,35%
1,MN-02,Rectangle Tray -12x10 - Pk of 25,Core,India,$13.55,$0.30,$0.75,$12.00,$33.90,20%,35%
2,MN-03,Oval Tray -15x10 Inch - Pk of 25,Core,India,$13.94,$0.34,$0.75,$15.00,$34.90,20%,35%


In [126]:
competitor_df.head(3)

Unnamed: 0,sku,avg_competitor_price,lowest_competitor_price,highest_competitor_price,competitor_count
0,MN-01,$36.90,$31.95,$44.90,9
1,MN-02,$32.50,$27.90,$39.95,8
2,MN-03,$35.20,$29.99,$42.50,7


In [127]:
sales_df.head(3)

Unnamed: 0,date,sku,sessions___total,page_views___total,units_ordered,ordered_product_sales,total_order_items
0,2025-09-01,MN-01,22,32,6,228.1,5
1,2025-09-01,MN-02,28,33,5,158.96,5
2,2025-09-01,MN-03,9,12,2,81.35,2


In [128]:
inventory_df.head(3)

Unnamed: 0,sku,condition,total_inventory,available,inbound_shipped,inbound_received,reserved_quantity,unfulfillable_quantity,inv_age_0_to_30_days,inv_age_31_to_60_days,inv_age_61_to_90_days,inv_age_181_to_330_days,inv_age_331_to_365_days,inv_age_365_plus_days,currency,units_shipped_t7,units_shipped_t30,units_shipped_t60,units_shipped_t90,sell_through,item_volume,volume_unit_measurement,storage_type,storage_volume,marketplace,days_of_supply,weeks_of_cover_t30,weeks_of_cover_t90
0,MN-01,New,382,264,40,9,68,1,12,261,56,0,0,0,USD,59,203,386,574,1.33,0.546781,cubic feet,Standard,144.350184,US,47,6,7
1,MN-02,New,306,206,80,2,18,0,127,91,17,0,0,0,USD,29,155,261,417,1.56,0.373971,cubic feet,Standard,77.038026,US,85,7,8
2,MN-03,New,139,95,40,0,3,1,42,8,62,0,0,0,USD,13,67,116,179,1.16,0.425846,cubic feet,Standard,40.45537,US,83,8,9


In [129]:
ads_df.head(3)

Unnamed: 0,date,sku,impressions,clicks,costperclick,clickthroughrate,cost,spend,currency_code,purchases1d,purchases7d,purchases14d,purchases30d,unitssoldclicks1d,unitssoldclicks7d,unitssoldclicks14d,unitssoldclicks30d,sales1d,sales7d,sales14d,sales30d,acosclicks7d,acosclicks14d,roasclicks7d,roasclicks14d
0,2025-09-01,MN-16,2016,14,1.998,0.694,27.97,27.97,USD,1,1,1,1,1,1,1,1,18.89,18.89,18.89,18.89,148.0677607,148.0677607,0.6753664641,0.6753664641
1,2025-09-01,MN-15,1810,33,2.062,1.823,68.06,68.06,USD,4,6,6,6,4,6,6,6,107.45,161.31,161.31,161.31,42.19205257,42.19205257,2.370114605,2.370114605
2,2025-09-01,MN-14,613,9,2.85,1.468,25.65,25.65,USD,0,1,1,1,0,1,1,1,0.0,90.98,90.98,90.98,28.19300945,28.19300945,3.546978558,3.546978558


In [183]:
returns_df.head(3)

Unnamed: 0,sku,return_quantity_\n(last_7_days),return_quantity_\n(last_30_days),return_quantity_\n(last_60_days),return_quantity_\n(last_90_days)
0,MN-01,0,9,18,26
1,MN-02,0,4,14,24
2,MN-03,1,4,10,13


## 3. Cleaning & Type Conversion

### Cleaning and Type Conversions for pricing_df table

In [131]:
# Copy the pricing_df for cleaning
pricing_clean = pricing_df.copy()

# Remove $ from cost-related and price columns
currency_cols = ['fba_fee', 'storage_fee', 'handling_cost', 'cost', 'current_price']
for col in currency_cols:
    pricing_clean[col] = (
        pricing_clean[col]
        .astype(str)                        # ensure string for replace
        .str.replace('$', '', regex=False)  # remove dollar signs
        .replace(['NA','nan','None',''], np.nan) # handle textual missing values
    )
    # Convert to float
    pricing_clean[col] = pd.to_numeric(pricing_clean[col], errors='coerce')

# Fill missing numeric values safely
for col in ['fba_fee', 'storage_fee', 'handling_cost', 'cost']:
    pricing_clean[col] = pricing_clean[col].fillna(0)

# Convert margins to numeric (remove %)
margin_cols = ['minimum_acceptable_margin_%', 'target_gross_margin_%']
for col in margin_cols:
    pricing_clean[col] = (
        pricing_clean[col]
        .astype(str)
        .str.replace('%','', regex=False)
        .replace(['NA','nan','None',''], np.nan)
    )
    pricing_clean[col] = pd.to_numeric(pricing_clean[col], errors='coerce')
    pricing_clean[col] = pricing_clean[col].fillna(0) / 100  # convert to decimal

# Calculate total unit cost
pricing_clean['total_unit_cost'] = (
    pricing_clean['cost'] + 
    pricing_clean['fba_fee'] + 
    pricing_clean['storage_fee'] + 
    pricing_clean['handling_cost']
)

pricing_clean.head(3)

Unnamed: 0,sku,product_description,product_role,country_of_origin,fba_fee,storage_fee,handling_cost,cost,current_price,minimum_acceptable_margin_%,target_gross_margin_%,total_unit_cost
0,MN-01,Rectangle Tray -14 x 10 Inch - Pk of 25,Core,India,15.11,0.44,0.75,16.0,38.9,0.2,0.35,32.3
1,MN-02,Rectangle Tray -12x10 - Pk of 25,Core,India,13.55,0.3,0.75,12.0,33.9,0.2,0.35,26.6
2,MN-03,Oval Tray -15x10 Inch - Pk of 25,Core,India,13.94,0.34,0.75,15.0,34.9,0.2,0.35,30.03


### Cleaning and Type Conversions for competitor_df table

In [177]:
# Copy competitor_df for cleaning
competitor_clean = competitor_df.copy()

#  Clean currency columns
currency_cols = ['avg_competitor_price', 'lowest_competitor_price', 'highest_competitor_price']
for col in currency_cols:
    competitor_clean[col] = (
        competitor_clean[col]
        .astype(str)                        # ensure string for replace
        .str.replace('$', '', regex=False)  # remove dollar signs
        .replace(['NA','nan','None',''], np.nan) # handle textual missing values
    )
    # Convert to float
    competitor_clean[col] = pd.to_numeric(competitor_clean[col], errors='coerce')

#  Fill missing numeric values safely
for col in currency_cols:
    competitor_clean[col] = competitor_clean[col].fillna(0)

#  Convert competitor_count to numeric
competitor_clean['competitor_count'] = pd.to_numeric(
    competitor_clean['competitor_count'], errors='coerce'
).fillna(0).astype(int)
competitor_clean.head(3)
# competitor_clean.dtypes

Unnamed: 0,sku,avg_competitor_price,lowest_competitor_price,highest_competitor_price,competitor_count
0,MN-01,36.9,31.95,44.9,9
1,MN-02,32.5,27.9,39.95,8
2,MN-03,35.2,29.99,42.5,7


### Cleaning and Type Conversions for sales_df table

In [179]:
# Copy competitor_df for cleaning
sales_clean = sales_df.copy()

# Convert date column
sales_clean["date"] = pd.to_datetime(sales_clean["date"], errors="coerce")

# Convert numeric columns (FIXED column names)
numeric_cols = [
    "sessions___total",
    "page_views___total",
    "units_ordered",
    "ordered_product_sales",
    "total_order_items"
]

sales_clean[numeric_cols] = sales_clean[numeric_cols].apply(
    pd.to_numeric, errors="coerce"
)

# Handle missing values
sales_clean.fillna({
    "sessions___total": 0,
    "page_views___total": 0,
    "units_ordered": 0,
    "total_order_items": 0,
    "ordered_product_sales": 0.0
}, inplace=True)

# Set final dtypes
sales_clean = sales_clean.astype({
    "sessions___total": "int64",
    "page_views___total": "int64",
    "units_ordered": "int64",
    "total_order_items": "int64",
    "ordered_product_sales": "float64"
})
# sales_df.dtypes
sales_clean.head(3)

Unnamed: 0,date,sku,sessions___total,page_views___total,units_ordered,ordered_product_sales,total_order_items
0,2025-09-01,MN-01,22,32,6,228.1,5
1,2025-09-01,MN-02,28,33,5,158.96,5
2,2025-09-01,MN-03,9,12,2,81.35,2


### Cleaning and Type Conversions for inventory_df table

In [180]:
# Create clean copy
inventory_clean = inventory_df.copy()

# Convert to numeric
inventory_clean["total_inventory"] = pd.to_numeric(
    inventory_clean["total_inventory"], errors="coerce"
)

# inventory_clean.dtypes
inventory_clean.head(3)

Unnamed: 0,sku,condition,total_inventory,available,inbound_shipped,inbound_received,reserved_quantity,unfulfillable_quantity,inv_age_0_to_30_days,inv_age_31_to_60_days,inv_age_61_to_90_days,inv_age_181_to_330_days,inv_age_331_to_365_days,inv_age_365_plus_days,currency,units_shipped_t7,units_shipped_t30,units_shipped_t60,units_shipped_t90,sell_through,item_volume,volume_unit_measurement,storage_type,storage_volume,marketplace,days_of_supply,weeks_of_cover_t30,weeks_of_cover_t90
0,MN-01,New,382.0,264,40,9,68,1,12,261,56,0,0,0,USD,59,203,386,574,1.33,0.546781,cubic feet,Standard,144.350184,US,47,6,7
1,MN-02,New,306.0,206,80,2,18,0,127,91,17,0,0,0,USD,29,155,261,417,1.56,0.373971,cubic feet,Standard,77.038026,US,85,7,8
2,MN-03,New,139.0,95,40,0,3,1,42,8,62,0,0,0,USD,13,67,116,179,1.16,0.425846,cubic feet,Standard,40.45537,US,83,8,9


### Cleaning and Type Conversions for ads_df table

### Cleaning and Type Conversions for returns_df table

In [187]:
# Create clean copy
returns_clean = returns_df.copy()

# Fix incorrect dtype
returns_clean["return_quantity_\n(last_7_days)"] = pd.to_numeric(
    returns_clean["return_quantity_\n(last_7_days)"], errors="coerce"
).fillna(0).astype("int64")

# Verify
returns_clean.head(3)


Unnamed: 0,sku,return_quantity_\n(last_7_days),return_quantity_\n(last_30_days),return_quantity_\n(last_60_days),return_quantity_\n(last_90_days)
0,MN-01,0,9,18,26
1,MN-02,0,4,14,24
2,MN-03,1,4,10,13


In [181]:
# Create clean copy
ads_clean = ads_df.copy()

# Columns that should be numeric
numeric_fix_cols = [
    "costperclick",
    "clickthroughrate",
    "acosclicks7d",
    "acosclicks14d",
    "roasclicks7d",
    "roasclicks14d"
]

# Convert to numeric safely
ads_clean[numeric_fix_cols] = ads_clean[numeric_fix_cols].apply(
    pd.to_numeric, errors="coerce"
)

# Optional safety: fill NaN with 0 where conversion failed
ads_clean[numeric_fix_cols] = ads_clean[numeric_fix_cols].fillna(0.0)

ads_clean.head(3)
# ads_clean.dtypes


Unnamed: 0,date,sku,impressions,clicks,costperclick,clickthroughrate,cost,spend,currency_code,purchases1d,purchases7d,purchases14d,purchases30d,unitssoldclicks1d,unitssoldclicks7d,unitssoldclicks14d,unitssoldclicks30d,sales1d,sales7d,sales14d,sales30d,acosclicks7d,acosclicks14d,roasclicks7d,roasclicks14d
0,2025-09-01,MN-16,2016,14,1.998,0.694,27.97,27.97,USD,1,1,1,1,1,1,1,1,18.89,18.89,18.89,18.89,148.067761,148.067761,0.675366,0.675366
1,2025-09-01,MN-15,1810,33,2.062,1.823,68.06,68.06,USD,4,6,6,6,4,6,6,6,107.45,161.31,161.31,161.31,42.192053,42.192053,2.370115,2.370115
2,2025-09-01,MN-14,613,9,2.85,1.468,25.65,25.65,USD,0,1,1,1,0,1,1,1,0.0,90.98,90.98,90.98,28.193009,28.193009,3.546979,3.546979


## 4. Aggregate Data to SKU Level

###  4.1 Historical Sales

In [170]:
sales_sku_df = (
    sales_clean
    .groupby("sku", as_index=False)
    .agg(
        sessions=("sessions___total", "sum"),
        page_views=("page_views___total", "sum"),
        units_ordered=("units_ordered", "sum"),
        ordered_product_sales=("ordered_product_sales", "sum")
    )
)

# Conversion Rate (safe division)
sales_sku_df["conversion_rate"] = (
    sales_sku_df["units_ordered"]
    / sales_sku_df["sessions"].replace(0, np.nan)
).fillna(0)

sales_sku_df.head()

Unnamed: 0,sku,sessions,page_views,units_ordered,ordered_product_sales,conversion_rate
0,MN-01,1921,2484,587,24582.01,0.30557
1,MN-02,1842,2440,431,15678.89,0.233985
2,MN-03,859,1135,196,7340.65,0.228172
3,MN-04,604,783,191,6002.72,0.316225
4,MN-05,1917,3011,341,8040.18,0.177882


### 4.2 Ads Performance


In [174]:
ads_sku_df = (
    ads_clean
    .groupby("sku", as_index=False)
    .agg(
        impressions=("impressions", "sum"),
        clicks=("clicks", "sum"),
        cost=("cost", "sum"),
        sales_7d=("sales7d", "sum"),
        sales_14d=("sales14d", "sum")
    )
)

# Ads Efficiency (safe division)
ads_sku_df["acos_7d"] = (
    ads_sku_df["cost"]
    / ads_sku_df["sales_7d"].replace(0, np.nan)
).fillna(0)

ads_sku_df["acos_14d"] = (
    ads_sku_df["cost"]
    / ads_sku_df["sales_14d"].replace(0, np.nan)
).fillna(0)

ads_sku_df.head()


Unnamed: 0,sku,impressions,clicks,cost,sales_7d,sales_14d,acos_7d,acos_14d
0,MN-01,131593,985,2762.06,14235.45,14258.43,0.194027,0.193714
1,MN-02,226158,1298,2286.92,11626.53,11664.02,0.196698,0.196066
2,MN-03,74652,394,670.2,3647.85,3675.34,0.183725,0.18235
3,MN-04,52363,231,903.79,2058.4,2090.28,0.439074,0.432377
4,MN-05,39306,390,637.85,1673.7,1779.54,0.381102,0.358435


## 5. Master Table Creation

In [189]:
# Create master copy
master_df = pricing_clean.copy()

# Datasets to merge (SKU-level)
merge_dfs = [
    competitor_clean,
    inventory_clean,
    returns_clean,
    sales_sku_df,
    ads_sku_df
]

# Sequential left joins on SKU
for df in merge_dfs:
    master_df = master_df.merge(df, on="sku", how="left")

# Fill missing numeric values with 0
numeric_cols = master_df.select_dtypes(include="number").columns
master_df[numeric_cols] = master_df[numeric_cols].fillna(0)

# Final view
master_df.dtypes


sku                                  object
product_description                  object
product_role                         object
country_of_origin                    object
fba_fee                             float64
storage_fee                         float64
handling_cost                       float64
cost_x                              float64
current_price                       float64
minimum_acceptable_margin_%         float64
target_gross_margin_%               float64
total_unit_cost                     float64
avg_competitor_price                float64
lowest_competitor_price             float64
highest_competitor_price            float64
competitor_count                      int64
condition                            object
total_inventory                     float64
available                             int64
inbound_shipped                       int64
inbound_received                      int64
reserved_quantity                     int64
unfulfillable_quantity          

In [191]:
master_df.head()

Unnamed: 0,sku,product_description,product_role,country_of_origin,fba_fee,storage_fee,handling_cost,cost_x,current_price,minimum_acceptable_margin_%,target_gross_margin_%,total_unit_cost,avg_competitor_price,lowest_competitor_price,highest_competitor_price,competitor_count,condition,total_inventory,available,inbound_shipped,inbound_received,reserved_quantity,unfulfillable_quantity,inv_age_0_to_30_days,inv_age_31_to_60_days,inv_age_61_to_90_days,inv_age_181_to_330_days,inv_age_331_to_365_days,inv_age_365_plus_days,currency,units_shipped_t7,units_shipped_t30,units_shipped_t60,units_shipped_t90,sell_through,item_volume,volume_unit_measurement,storage_type,storage_volume,marketplace,days_of_supply,weeks_of_cover_t30,weeks_of_cover_t90,return_quantity_\n(last_7_days),return_quantity_\n(last_30_days),return_quantity_\n(last_60_days),return_quantity_\n(last_90_days),sessions,page_views,units_ordered,ordered_product_sales,conversion_rate,impressions,clicks,cost_y,sales_7d,sales_14d,acos_7d,acos_14d
0,MN-01,Rectangle Tray -14 x 10 Inch - Pk of 25,Core,India,15.11,0.44,0.75,16.0,38.9,0.2,0.35,32.3,36.9,31.95,44.9,9,New,382.0,264,40,9,68,1,12,261,56,0,0,0,USD,59,203,386,574,1.33,0.546781,cubic feet,Standard,144.350184,US,47,6,7,0,9,18,26,1921,2484,587,24582.01,0.30557,131593,985,2762.06,14235.45,14258.43,0.194027,0.193714
1,MN-02,Rectangle Tray -12x10 - Pk of 25,Core,India,13.55,0.3,0.75,12.0,33.9,0.2,0.35,26.6,32.5,27.9,39.95,8,New,306.0,206,80,2,18,0,127,91,17,0,0,0,USD,29,155,261,417,1.56,0.373971,cubic feet,Standard,77.038026,US,85,7,8,0,4,14,24,1842,2440,431,15678.89,0.233985,226158,1298,2286.92,11626.53,11664.02,0.196698,0.196066
2,MN-03,Oval Tray -15x10 Inch - Pk of 25,Core,India,13.94,0.34,0.75,15.0,34.9,0.2,0.35,30.03,35.2,29.99,42.5,7,New,139.0,95,40,0,3,1,42,8,62,0,0,0,USD,13,67,116,179,1.16,0.425846,cubic feet,Standard,40.45537,US,83,8,9,1,4,10,13,859,1135,196,7340.65,0.228172,74652,394,670.2,3647.85,3675.34,0.183725,0.18235
3,MN-04,Oval Tray - 13x9 Inch - pk of 25,Core,India,12.89,0.3,0.75,12.0,29.9,0.2,0.35,25.94,28.4,24.5,35.9,6,New,171.0,70,80,8,13,0,3,84,0,0,0,0,USD,18,71,128,187,1.65,0.375575,cubic feet,Standard,26.29025,US,99,8,10,1,4,6,16,604,783,191,6002.72,0.316225,52363,231,903.79,2058.4,2090.28,0.439074,0.432377
4,MN-05,Christmas Tray - 17x12 Inch - Pk of 5,Seasonal,India,11.8,0.22,0.75,13.0,21.9,0.25,0.45,25.77,23.9,18.95,0.0,3,New,283.0,0,0,49,234,0,53,141,0,0,0,0,USD,249,491,543,557,1.47,0.270417,cubic feet,Standard,0.0,US,145,0,1,2,4,4,6,1917,3011,341,8040.18,0.177882,39306,390,637.85,1673.7,1779.54,0.381102,0.358435


## 6. Save Processed Outputs

In [None]:

processed_path = Path("../data/processed")
processed_path.mkdir(parents=True, exist_ok=True)

sales_sku_df.to_csv(processed_path / "sales_sku_aggregated.csv", index=False)
ads_sku_df.to_csv(processed_path / "ads_sku_aggregated.csv", index=False)
master_df.to_csv(processed_path / "master_pricing_table_v1.csv", index=False)

print("Processed datasets saved successfully.")