In [2]:
import pandas as pd
import numpy as np 

In [16]:
# Create a sample DataFrame 
data = {
    'Date': pd.to_datetime(['2025-01-05', '2025-01-05', '2025-01-06', '2025-01-06', '2025-01-07', '2025-01-07', '2025-01-08', '2025-01-08']),
    'Region': ['North', 'South', 'North', 'East', 'South', 'North', 'East', 'South'],
    'Product': ['Laptop', 'Mouse', 'Keyboard', 'Laptop', 'Mouse', 'Keyboard', 'Laptop', 'Mouse'],
    'Sales_Rep': ['Alice', 'Bob', 'Alice', 'Charlie', 'Bob', 'David', 'Charlie', 'Alice'],
    'Units_Sold': [2, 5, 3, 1, 4, 2, 3, 6],
    'Price_Per_Unit': [1200, 25, 75, 1100, 30, 80, 1150, 28]
}
df = pd.DataFrame(data)

In [18]:
df

Unnamed: 0,Date,Region,Product,Sales_Rep,Units_Sold,Price_Per_Unit
0,2025-01-05,North,Laptop,Alice,2,1200
1,2025-01-05,South,Mouse,Bob,5,25
2,2025-01-06,North,Keyboard,Alice,3,75
3,2025-01-06,East,Laptop,Charlie,1,1100
4,2025-01-07,South,Mouse,Bob,4,30
5,2025-01-07,North,Keyboard,David,2,80
6,2025-01-08,East,Laptop,Charlie,3,1150
7,2025-01-08,South,Mouse,Alice,6,28


#### <font color="blue">Basic Aggregations on a Sales Dataset</font>

##### Calculate Total Sales for each row

In [21]:
df['Total_Sales'] = df['Units_Sold'] * df['Price_Per_Unit']

print("--- Original Sales DataFrame ---")
print(df)
print("\n" + "="*40 + "\n")

--- Original Sales DataFrame ---
        Date Region   Product Sales_Rep  Units_Sold  Price_Per_Unit  \
0 2025-01-05  North    Laptop     Alice           2            1200   
1 2025-01-05  South     Mouse       Bob           5              25   
2 2025-01-06  North  Keyboard     Alice           3              75   
3 2025-01-06   East    Laptop   Charlie           1            1100   
4 2025-01-07  South     Mouse       Bob           4              30   
5 2025-01-07  North  Keyboard     David           2              80   
6 2025-01-08   East    Laptop   Charlie           3            1150   
7 2025-01-08  South     Mouse     Alice           6              28   

   Total_Sales  
0         2400  
1          125  
2          225  
3         1100  
4          120  
5          160  
6         3450  
7          168  




##### Total Units Sold across all sales

In [24]:
total_units_sold = df['Units_Sold'].sum()
print(f"Total Units Sold: {total_units_sold}")

Total Units Sold: 26


##### Maximum Total Sales recorded

In [33]:
max_total_sales = df['Total_Sales'].max()
print(f"Maximum Single Sale Amount: ${max_total_sales:.2f}")

Maximum Single Sale Amount: $3450.00


##### Average Price Per Unit

In [36]:
avg_price_per_unit = df['Price_Per_Unit'].mean()
print(f"Average Price Per Unit: ${avg_price_per_unit:.2f}")

Average Price Per Unit: $461.00


##### Number of unique products sold

In [41]:
num_uniq_products = df['Product'].nunique()
print(f"Number of Unique Products Sold: {num_uniq_products}")

Number of Unique Products Sold: 3


##### Descriptive statistics for numerical columns

In [44]:
print("\nDescriptive Statistics for Sales Data:")
print(df[['Units_Sold', 'Price_Per_Unit', 'Total_Sales']].describe())


Descriptive Statistics for Sales Data:
       Units_Sold  Price_Per_Unit  Total_Sales
count    8.000000        8.000000     8.000000
mean     3.250000      461.000000   968.500000
std      1.669046      571.547523  1281.909067
min      1.000000       25.000000   120.000000
25%      2.000000       29.500000   151.250000
50%      3.000000       77.500000   196.500000
75%      4.250000     1112.500000  1425.000000
max      6.000000     1200.000000  3450.000000


#### <font color="blue">GroupBy Aggregations</font>

##### 1. Total Sales by Region

In [58]:
print("\nTotal Sales by Region:")
sales_by_region = df.groupby('Region')['Total_Sales'].sum()
print(sales_by_region) 


Total Sales by Region:
Region
East     4550
North    2785
South     413
Name: Total_Sales, dtype: int64


#####  2 Average Units Sold by Product

In [62]:
print("\nAverage Units Sold by Product:")
avg_units_by_prod = df.groupby('Product')['Units_Sold'].mean()
print(avg_units_by_prod)


Average Units Sold by Product:
Product
Keyboard    2.5
Laptop      2.0
Mouse       5.0
Name: Units_Sold, dtype: float64


##### 3. Sales performance per Sales Representative (Total Sales and Total Units) 

In [65]:
print("\n (Total Sales, Total Units, Number of Sales):")
sales_rep_performance = df.groupby('Sales_Rep').agg(
    Total_Sales=('Total_Sales', 'sum'),
    Total_Units_Sold=('Units_Sold', 'sum'),
    Num_Sales=('Total_Sales', 'count') # Count of sales transactions
)
print(sales_rep_performance)


 (Total Sales, Total Units, Number of Sales):
           Total_Sales  Total_Units_Sold  Num_Sales
Sales_Rep                                          
Alice             2793                11          3
Bob                245                 9          2
Charlie           4550                 4          2
David              160                 2          1


##### 4. Multiple aggregations for a single column, grouped by multiple columns

In [70]:
print("\nUnits Sold Summary by Region and Product (Min, Max, Mean):")
region_product_units = df.groupby(['Region', 'Product'])['Units_Sold'].agg(['min', 'max', 'mean'])
print(region_product_units)



Units Sold Summary by Region and Product (Min, Max, Mean):
                 min  max  mean
Region Product                 
East   Laptop      1    3   2.0
North  Keyboard    2    3   2.5
       Laptop      2    2   2.0
South  Mouse       4    6   5.0
