In [10]:
import pandas as pd
import numpy as np

# Generate a random sales dataset
np.random.seed(42)  # for reproducibility

regions = ['East', 'West', 'Central', 'South']
categories = ['Furniture', 'Office Supplies', 'Technology']

data = {
    'Region': np.random.choice(regions, 100),
    'Category': np.random.choice(categories, 100),
    'Sales': np.round(np.random.uniform(100, 1000, 100), 2),
    'Profit': np.round(np.random.uniform(-100, 300, 100), 2),
    'Discount': np.round(np.random.uniform(0.0, 0.5, 100), 2)
}

df = pd.DataFrame(data)


In [14]:
df.head()

Unnamed: 0,Region,Category,Sales,Profit,Discount
0,Central,Technology,510.88,97.56,0.34
1,South,Office Supplies,296.6,-28.47,0.03
2,East,Office Supplies,474.86,46.59,0.46
3,Central,Office Supplies,894.95,197.67,0.22
4,Central,Office Supplies,391.91,188.38,0.12


In [17]:
df.describe()

Unnamed: 0,Sales,Profit,Discount
count,100.0,100.0,100.0
mean,562.4163,89.0807,0.2487
std,256.522597,112.95542,0.142614
min,100.47,-97.93,0.01
25%,341.8775,1.3375,0.13
50%,536.7,73.585,0.255
75%,772.97,180.74,0.3425
max,997.97,297.82,0.49


In [20]:
print("🔹 Sample Data:")
print(df.head())

# Check for missing values
print("\n🔹 Missing Values:")
print(df.isnull().sum())

# Group by Region and Category
grouped = df.groupby(['Region', 'Category']).agg({
    'Sales': 'sum',
    'Profit': 'sum',
    'Discount': 'mean'
}).reset_index()

🔹 Sample Data:
    Region         Category   Sales  Profit  Discount
0  Central       Technology  510.88   97.56      0.34
1    South  Office Supplies  296.60  -28.47      0.03
2     East  Office Supplies  474.86   46.59      0.46
3  Central  Office Supplies  894.95  197.67      0.22
4  Central  Office Supplies  391.91  188.38      0.12

🔹 Missing Values:
Region      0
Category    0
Sales       0
Profit      0
Discount    0
dtype: int64


In [23]:
grouped.columns = ['Region', 'Category', 'Total Sales', 'Total Profit', 'Average Discount']

print("\n🔹 Grouped Sales Insight:")
print(grouped.sort_values(by='Total Sales', ascending=False))

# Pivot table (optional)
pivot = pd.pivot_table(df, values='Sales', index='Region', columns='Category', aggfunc='sum')
print("\n🔹 Sales Pivot Table:")
print(pivot)


🔹 Grouped Sales Insight:
     Region         Category  Total Sales  Total Profit  Average Discount
6     South        Furniture      8211.76       1059.12          0.231538
8     South       Technology      7350.28        824.97          0.118333
11     West       Technology      7080.44        421.98          0.333000
2   Central       Technology      5376.23        448.05          0.278000
5      East       Technology      4941.95        704.26          0.397143
3      East        Furniture      4303.26        719.86          0.303333
10     West  Office Supplies      4046.42        314.48          0.324286
1   Central  Office Supplies      3888.08       1182.22          0.284286
9      West        Furniture      3472.72       1269.82          0.232222
4      East  Office Supplies      3153.62        528.49          0.191429
0   Central        Furniture      2779.25       1114.89          0.207143
7     South  Office Supplies      1637.62        319.93          0.118000

🔹 Sales Piv

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=e109efa1-a912-492b-a17b-c99a8507c7bd' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>