# interpretation and insights

## imports

In [2]:
import sys
sys.path.append("../src")

import pandas as pd
from pathlib import Path

from data.loader import load_excel_sheets
from analysis.supply_demand import (
    compute_subcategory_supply,
    compute_subcategory_demand,
    merge_supply_demand
)
from analysis.gap_score import (
    compute_gap_score,
    normalize_gap_score,
    classify_gap_level
)
from insights.chart_insights import generate_interpretation_and_insights

pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:,.2f}'.format

## load and prepare data

In [3]:
file_path = "../data/processed/Cleaned_data.xlsx"

product_lookup = pd.read_excel(file_path, sheet_name=' Product Lookup')
product_subcategory = pd.read_excel(file_path, sheet_name='Product Subcategory')
product_category = pd.read_excel(file_path, sheet_name='Product Category')
sales_data = pd.read_excel(file_path, sheet_name='Sales Data')

products_with_subcat = product_lookup.merge(product_subcategory, on='ProductSubcategoryKey', how='left')
products_full = products_with_subcat.merge(product_category, on='ProductCategoryKey', how='left')

sales_full = sales_data.merge(
    products_full[['ProductKey', 'CategoryName', 'SubcategoryName']],
    on='ProductKey',
    how='left'
)

print(f"data loaded:")
print(f"  categories: {sales_full['CategoryName'].nunique()}")
print(f"  subcategories: {sales_full['SubcategoryName'].nunique()}")

data loaded:
  categories: 3
  subcategories: 17


## compute gap analysis

calculate supply, demand, and gap scores at subcategory level.

In [None]:
supply_df = compute_subcategory_supply(sales_data, products_full)
demand_df = compute_subcategory_demand(sales_full)
supply_demand_df = merge_supply_demand(supply_df, demand_df)

gap_df = compute_gap_score(supply_demand_df, demand_col='TotalQuantitySold', supply_col='UniqueProducts')
gap_df = normalize_gap_score(gap_df, gap_col='GapScore')
gap_df = classify_gap_level(gap_df, gap_col='GapScore')

gap_summary = gap_df[['CategoryName', 'SubcategoryName', 'UniqueProducts', 
                       'TotalQuantitySold', 'GapScore', 'GapLevel']].copy()
gap_summary.columns = ['parent_category', 'category', 'supply', 'demand', 'gap_score', 'gap_status']
gap_summary['gap_pct'] = ((gap_summary['demand'] - gap_summary['supply']) / (gap_summary['supply'] + 1) * 100).round(2)
gap_summary = gap_summary.sort_values('gap_score', ascending=False).reset_index(drop=True)

print(f"\ngap analysis computed for {len(gap_summary)} subcategories")
display(gap_summary.head(10))


gap analysis computed for 37 subcategories


Unnamed: 0,parent_category,category,supply,demand,gap_score,gap_status,gap_pct
0,Accessories,Bottles and Cages,3,15106,3776.75,Critical Gap,377575.0
1,Accessories,Tires and Tubes,11,29772,2481.08,Critical Gap,248008.33
2,Clothing,Caps,1,4151,2076.0,Critical Gap,207500.0
3,Accessories,Fenders,1,3960,1980.5,Critical Gap,197950.0
4,Accessories,Helmets,3,6034,1508.75,Critical Gap,150775.0
5,Accessories,Cleaners,1,1706,853.5,Critical Gap,85250.0
6,Clothing,Gloves,6,2644,377.86,Critical Gap,37685.71
7,Accessories,Hydration Packs,1,695,348.0,Critical Gap,34700.0
8,Clothing,Jerseys,8,3113,346.0,Critical Gap,34500.0
9,Clothing,Socks,4,1063,212.8,Critical Gap,21180.0


## generate interpretation and insights

In [5]:
insights_text = generate_interpretation_and_insights(gap_summary)

print(insights_text)


gap analysis interpretation and insights

executive summary
--------------------------------------------------------------------------------

the analysis examined 37 product categories to identify supply-demand imbalances.
16 categories (43.2%) show critical or high gaps requiring immediate action.

most critical shortage:
- category: Bottles and Cages
- gap score: 3776.75
- supply: 3 products
- demand: 15106 orders
- gap: 377575.0%

best balanced category:
- category: Mountain Frames
- gap score: 0.03
- supply: 28 products
- demand: 0 orders


interpretation
--------------------------------------------------------------------------------

the gap score represents the ratio of demand to supply. higher scores indicate
categories where customer orders significantly exceed available product listings.

for Bottles and Cages, the gap score of 3776.75 means demand is
3776.8x higher than supply. this represents potential revenue loss
and customer dissatisfaction due to limited product avail

## export insights to file

In [6]:
output_dir = Path("../results/insights")
output_dir.mkdir(parents=True, exist_ok=True)

insights_path = output_dir / "interpretation_and_insights.txt"

with open(insights_path, 'w') as f:
    f.write(insights_text)

print(f"insights saved to: {insights_path}")

insights saved to: ../results/insights/interpretation_and_insights.txt


## summary statistics

In [7]:
print("gap analysis summary")
print("=" * 60)
print(f"total subcategories analyzed: {len(gap_summary)}")
print(f"average gap score: {gap_summary['gap_score'].mean():.2f}")
print(f"median gap score: {gap_summary['gap_score'].median():.2f}")
print(f"highest gap score: {gap_summary['gap_score'].max():.2f}")
print(f"lowest gap score: {gap_summary['gap_score'].min():.2f}")

print("\ngap level distribution:")
for status, count in gap_summary['gap_status'].value_counts().items():
    pct = (count / len(gap_summary)) * 100
    print(f"  {status}: {count} ({pct:.1f}%)")

print("\ntop 5 categories by parent:")
parent_gaps = gap_summary.groupby('parent_category').agg({
    'gap_score': 'mean',
    'category': 'count'
}).round(2)
parent_gaps.columns = ['avg_gap_score', 'subcategory_count']
display(parent_gaps.sort_values('avg_gap_score', ascending=False))

gap analysis summary
total subcategories analyzed: 37
average gap score: 402.16
median gap score: 0.50
highest gap score: 3776.75
lowest gap score: 0.03

gap level distribution:
  Low Gap: 20 (54.1%)
  Critical Gap: 10 (27.0%)
  High Gap: 6 (16.2%)
  Moderate Gap: 1 (2.7%)

top 5 categories by parent:


Unnamed: 0_level_0,avg_gap_score,subcategory_count
parent_category,Unnamed: 1_level_1,Unnamed: 2_level_1
Accessories,934.93,12
Clothing,407.72,8
Bikes,132.13,3
Components,0.19,14
