In [1]:
# ✅ Step 1: Install & Import Required Libraries
import pandas as pd
import numpy as np
from google.colab import files
import io

# ✅ Step 2: Upload CSV File
uploaded = files.upload()

# ✅ Step 3: Load Dataset
# Automatically detect and read the uploaded file
filename = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[filename]))

# ✅ Step 4: Preview Original Dataset
print("🔹 Original Dataset:")
display(df.head())

# ✅ Step 5: Clean Missing/Null Values
df_clean = df.dropna()
print(f"\n✅ Cleaned Dataset: {df_clean.shape[0]} rows remaining after dropping missing values.\n")

# ✅ Step 6: Sum of Sales by Region
sales_by_region = df_clean.groupby("Region")["TotalSales"].sum()
print("📊 Sum of Sales by Region:")
print(sales_by_region)

# ✅ Step 7: Average Sales per Product
avg_sales_per_product = df_clean.groupby("Product")["TotalSales"].mean()
print("\n📊 Average Sales per Product:")
print(avg_sales_per_product)

# ✅ Step 8: Highest & Lowest Selling Products (by Total Sales)
product_sales = df_clean.groupby("Product")["TotalSales"].sum()
highest = product_sales.idxmax(), product_sales.max()
lowest = product_sales.idxmin(), product_sales.min()
print(f"\n🏆 Highest Selling Product: {highest[0]} (Total Sales: {highest[1]})")
print(f"📉 Lowest Selling Product: {lowest[0]} (Total Sales: {lowest[1]})")

# ✅ Step 9: NumPy Stats on Numerical Columns
numerical_fields = df_clean[["Quantity", "UnitPrice", "TotalSales"]]
print("\n📈 NumPy Statistics:")
print("Mean:\n", numerical_fields.mean())
print("\nMedian:\n", numerical_fields.median())
print("\nStandard Deviation:\n", numerical_fields.std())



Saving data sheet.csv to data sheet (1).csv
🔹 Original Dataset:


Unnamed: 0,OrderID,Date,Region,CustomerName,Product,Quantity,UnitPrice,TotalSales,PaymentMethod
0,1001,2023-01-15,North,Alice Johnson,Laptop,2.0,700.0,1400.0,Credit Card
1,1002,2023-01-16,South,Rahul Mehta,Mobile Phone,5.0,300.0,1500.0,UPI
2,1003,2023-01-17,East,Fatima Noor,Headphones,10.0,50.0,500.0,Debit Card
3,1004,2023-01-18,West,,Laptop,1.0,720.0,720.0,Credit Card
4,1005,2023-01-19,North,Zoe Carter,Mobile Phone,3.0,,,UPI



✅ Cleaned Dataset: 15 rows remaining after dropping missing values.

📊 Sum of Sales by Region:
Region
East     2520.0
North    3765.0
South    3830.0
West      585.0
Name: TotalSales, dtype: float64

📊 Average Sales per Product:
Product
Headphones       415.0
Keyboard         342.5
Laptop          1170.0
Mobile Phone     897.5
Monitor          492.5
Smart Watch      200.0
Tablet           900.0
Name: TotalSales, dtype: float64

🏆 Highest Selling Product: Mobile Phone (Total Sales: 3590.0)
📉 Lowest Selling Product: Smart Watch (Total Sales: 200.0)

📈 NumPy Statistics:
Mean:
 Quantity        3.200000
UnitPrice     310.000000
TotalSales    713.333333
dtype: float64

Median:
 Quantity        2.0
UnitPrice     290.0
TotalSales    585.0
dtype: float64

Standard Deviation:
 Quantity        2.396426
UnitPrice     230.666735
TotalSales    451.559467
dtype: float64
