In [2]:
import pandas as pd
import numpy as np

#### Load the dataset using Pandas ---

In [27]:
df = pd.read_csv('datset - Sheet1.csv')

In [29]:
df.head()

Unnamed: 0,OrderID,Date,Region,CustomerName,Product,Quantity,UnitPrice,TotalSales,PaymentMethod
0,1001,2023-01-15,North,Alice Johnson,Laptop,2.0,700.0,1400.0,Credit Card
1,1002,2023-01-16,South,Rahul Mehta,Mobile Phone,5.0,300.0,1500.0,UPI
2,1003,2023-01-17,East,Fatima Noor,Headphones,10.0,50.0,500.0,Debit Card
3,1004,2023-01-18,West,,Laptop,1.0,720.0,720.0,Credit Card
4,1005,2023-01-19,North,Zoe Carter,Mobile Phone,3.0,,,UPI


#### Clean missing/null values ---

In [32]:
# Check null values
print(df.isnull().sum())

OrderID          0
Date             0
Region           0
CustomerName     2
Product          0
Quantity         1
UnitPrice        2
TotalSales       2
PaymentMethod    0
dtype: int64


In [34]:
# Fill missing CustomerName with 'Unknown'

df['CustomerName'].fillna('Unknown', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['CustomerName'].fillna('Unknown', inplace=True)


In [38]:
# Fill missing Quantity using median of that Product

df['Quantity'] = df.groupby('Product')['Quantity'].transform(lambda x: x.fillna(x.median()))

In [40]:
# Fill missing UnitPrice using mean of that Product

df['UnitPrice'] = df.groupby('Product')['UnitPrice'].transform(lambda x: x.fillna(x.mean()))

In [42]:
# Recalculate TotalSales = Quantity × UnitPrice

df['TotalSales'] = df['Quantity'] * df['UnitPrice']

In [48]:
df.head()

Unnamed: 0,OrderID,Date,Region,CustomerName,Product,Quantity,UnitPrice,TotalSales,PaymentMethod
0,1001,2023-01-15,North,Alice Johnson,Laptop,2.0,700.0,1400.0,Credit Card
1,1002,2023-01-16,South,Rahul Mehta,Mobile Phone,5.0,300.0,1500.0,UPI
2,1003,2023-01-17,East,Fatima Noor,Headphones,10.0,50.0,500.0,Debit Card
3,1004,2023-01-18,West,Unknown,Laptop,1.0,720.0,720.0,Credit Card
4,1005,2023-01-19,North,Zoe Carter,Mobile Phone,3.0,298.75,896.25,UPI


#### Sum of sales by region ---

In [54]:
sales_by_region = df.groupby('Region')['TotalSales'].sum()

print("Sales by Region:\n", sales_by_region)

Sales by Region:
 Region
East     2520.00
North    4661.25
South    3830.00
West     3107.00
Name: TotalSales, dtype: float64


#### Average sales per product ---

In [58]:
avg_sales_product = df.groupby('Product')['TotalSales'].mean()

print("Average Sales per Product:\n", avg_sales_product)

Average Sales per Product:
 Product
Headphones      415.000000
Keyboard        342.500000
Laptop          935.333333
Mobile Phone    897.250000
Monitor         468.333333
Smart Watch     200.000000
Tablet          900.000000
Name: TotalSales, dtype: float64


#### Highest & lowest selling products ---

In [62]:
product_sales = df.groupby('Product')['TotalSales'].sum()
highest_selling = product_sales.idxmax()
lowest_selling = product_sales.idxmin()

print(f"Highest selling product: {highest_selling}")
print(f"Lowest selling product: {lowest_selling}")

Highest selling product: Laptop
Lowest selling product: Smart Watch


#### Use NumPy to calculate mean, median, standard deviation of numerical fields ---

In [71]:
# NumPy statistics on numerical fields
quantities = df['Quantity'].to_numpy()
unit_prices = df['UnitPrice'].to_numpy()
total_sales = df['TotalSales'].to_numpy()

print("NumPy Stats:")
print(f"Quantity → Mean: {np.mean(quantities):.2f}, Median: {np.median(quantities):.2f}, Std Dev: {np.std(quantities):.2f}")
print(f"UnitPrice → Mean: {np.mean(unit_prices):.2f}, Median: {np.median(unit_prices):.2f}, Std Dev: {np.std(unit_prices):.2f}")
print(f"TotalSales → Mean: {np.mean(total_sales):.2f}, Median: {np.median(total_sales):.2f}, Std Dev: {np.std(total_sales):.2f}")


NumPy Stats:
Quantity → Mean: 2.80, Median: 2.00, Std Dev: 2.16
UnitPrice → Mean: 363.04, Median: 296.88, Std Dev: 240.57
TotalSales → Mean: 705.91, Median: 650.00, Std Dev: 385.64
