In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

root = Path(__file__).resolve().parents[1]
sales_path = root / 'data_crm' / 'processed_sales_latest.csv'
df = pd.read_csv(sales_path)
df.columns = [c.strip().lower() for c in df.columns]
df['qty'] = pd.to_numeric(df.get('qty', 0), errors='coerce').fillna(0)
df['sell_price'] = pd.to_numeric(df.get('sell_price', 0), errors='coerce').fillna(0)
df['amount'] = df['qty'] * df['sell_price']
print('Rows:', len(df))


In [None]:
top_sku = df.groupby('sku_id', dropna=False)['qty'].sum().reset_index().sort_values('qty', ascending=False).head(10)
ax = top_sku.plot(kind='bar', x='sku_id', y='qty', figsize=(10,4), title='Top SKUs by Qty')
plt.tight_layout()
plt.show()


In [None]:
top_store = df.groupby('store_name', dropna=False)['qty'].sum().reset_index().sort_values('qty', ascending=False).head(10)
ax = top_store.plot(kind='bar', x='store_name', y='qty', figsize=(10,4), title='Top Stores by Qty')
plt.tight_layout()
plt.show()


In [None]:
missing = (df.get('sku_id').astype(str).str.strip() == '')
print('Missing SKU rows:', int(missing.sum()))
df_missing = df[missing].head(20)
df_missing[['orderid','store_name','sku_key','my_size']]
