**Importing Libraries**

In [1]:
import pandas as pd
import numpy as np

**Uploading files**

In [2]:
from google.colab import files
uploaded = files.upload()

Saving employees.csv to employees.csv
Saving products.csv to products.csv
Saving sales.csv to sales.csv
Saving stores.csv to stores.csv


**Loading files**

In [3]:
dfEmp = pd.read_csv("employees.csv")
dfPro = pd.read_csv("products.csv")
dfSales = pd.read_csv("sales.csv")
dfStores = pd.read_csv("stores.csv")

**Cleaning null values or missing entries**

In [5]:
print(f"Null values: {dfEmp.isna().sum().any()}")
print(f"Null values: {dfPro.isna().sum().any()}")
print(f"Null values: {dfSales.isna().sum().any()}")
print(f"Null values: {dfStores.isna().sum().any()}")

Null values: False
Null values: False
Null values: False
Null values: False


**Printing DataFrames**

In [7]:
dfEmp.head()

Unnamed: 0,employeeID,name,storeID,role,hireDate
0,1,John Smith,1,Cashier,2025-07-23 19:53:58
1,2,Alice Johnson,2,Manager,2025-07-23 19:53:58
2,3,Bob Lee,3,Sales Associate,2025-07-23 19:53:58
3,4,Eva Martinez,4,Supervisor,2025-07-23 19:53:58
4,5,David Chen,5,Stock Clerk,2025-07-23 19:53:58


In [8]:
dfPro.head()

Unnamed: 0,productID,name,category,price,cost,discountPercentage,createdAt
0,1,Laptop Pro 14,Electronics,1200.0,900.0,10.0,2025-07-23 19:53:53
1,2,Organic Apples,Grocery,3.5,2.0,5.0,2025-07-23 19:53:53
2,3,Cotton T-Shirt,Apparel,25.0,10.0,15.0,2025-07-23 19:53:53
3,4,Bluetooth Speaker,Electronics,60.0,40.0,20.0,2025-07-23 19:53:53
4,5,LED Bulb Pack,Home Goods,15.0,8.0,0.0,2025-07-23 19:53:53


In [9]:
dfSales.head()

Unnamed: 0,saleID,productID,storeID,employeeID,quantity,saleDate
0,1,1,1,1,2,2025-07-23 19:54:07
1,2,2,2,2,100,2025-07-23 19:54:07
2,3,3,3,3,30,2025-07-23 19:54:07
3,4,4,4,4,5,2025-07-23 19:54:07
4,5,5,5,5,20,2025-07-23 19:54:07


In [10]:
dfStores.head()

Unnamed: 0,storeID,name,region,address,createdAt
0,1,Urban Mart - NY,East Coast,"101 Main St, New York, NY",2025-07-23 19:52:23
1,2,SuperSave - LA,West Coast,"202 Ocean Ave, LA, CA",2025-07-23 19:52:23
2,3,FreshStore - TX,South,"303 Sunset Blvd, Austin, TX",2025-07-23 19:52:23
3,4,MegaMart - IL,Midwest,"404 Windy Rd, Chicago, IL",2025-07-23 19:52:23
4,5,BudgetBazaar - FL,Southeast,"505 Palm Dr, Miami, FL",2025-07-23 19:52:23


**Calculate revenue, discount percentage & profit margins**

In [12]:
df = dfPro.merge(dfSales, on="productID", how="inner")

In [15]:
df["revenue"] = df["quantity"] * df["price"]
df["profit"] = df["revenue"] - (df["quantity"] * df["cost"])
df["discountPercentage"] = round(100 - (df["cost"] / df["price"]) * 100, 2)
df["totalCost"] = df["quantity"] * df["cost"]
df["profitMargins"] = round((df["profit"] / df["revenue"]), 2)

In [16]:
df

Unnamed: 0,productID,name,category,price,cost,discountPercentage,createdAt,saleID,storeID,employeeID,quantity,saleDate,revenue,profit,totalCost,profitMargins
0,1,Laptop Pro 14,Electronics,1200.0,900.0,25.0,2025-07-23 19:53:53,1,1,1,2,2025-07-23 19:54:07,2400.0,600.0,1800.0,0.25
1,2,Organic Apples,Grocery,3.5,2.0,42.86,2025-07-23 19:53:53,2,2,2,100,2025-07-23 19:54:07,350.0,150.0,200.0,0.43
2,3,Cotton T-Shirt,Apparel,25.0,10.0,60.0,2025-07-23 19:53:53,3,3,3,30,2025-07-23 19:54:07,750.0,450.0,300.0,0.6
3,4,Bluetooth Speaker,Electronics,60.0,40.0,33.33,2025-07-23 19:53:53,4,4,4,5,2025-07-23 19:54:07,300.0,100.0,200.0,0.33
4,5,LED Bulb Pack,Home Goods,15.0,8.0,46.67,2025-07-23 19:53:53,5,5,5,20,2025-07-23 19:54:07,300.0,140.0,160.0,0.47


In [18]:
productSummary = df.groupby("productID").agg(
    productRevenue=pd.NamedAgg(column="revenue", aggfunc="sum")
).merge(dfPro[["productID", "name"]], on="productID", how="inner") \
.sort_values("productRevenue", ascending=False)

storeSummary = df.groupby("storeID").agg(
    storeRevenue=pd.NamedAgg(column="revenue", aggfunc="sum")
).merge(dfStores[["storeID", "name"]], on="storeID", how="inner") \
.sort_values("storeRevenue", ascending=False)

In [19]:
print("------------------Product Summary--------------------------")
print(productSummary[["name", "productRevenue"]].iloc[:5, :])
print("-----------------------------------------------------------\n")

print("------------------Store Summary----------------------------")
print(storeSummary[["name", "storeRevenue"]].iloc[:5, :])
print("-----------------------------------------------------------\n")

------------------Product Summary--------------------------
                name  productRevenue
0      Laptop Pro 14          2400.0
2     Cotton T-Shirt           750.0
1     Organic Apples           350.0
3  Bluetooth Speaker           300.0
4      LED Bulb Pack           300.0
-----------------------------------------------------------

------------------Store Summary----------------------------
                name  storeRevenue
0    Urban Mart - NY        2400.0
2    FreshStore - TX         750.0
1     SuperSave - LA         350.0
3      MegaMart - IL         300.0
4  BudgetBazaar - FL         300.0
-----------------------------------------------------------



**Delieverables:-**

In [20]:
#1. cleaned datasets

df.to_csv("summary.csv")

In [21]:
storeSummary.iloc[0, :]["name"]

'Urban Mart - NY'

In [22]:
#2. summarizing key metrics

totalRevenue = df["revenue"].sum()
totalProfit = df["profit"].sum()

topProduct = productSummary.iloc[0, :]["name"]
bottomProduct = productSummary.iloc[-1, :]["name"]

topStore = storeSummary.iloc[0, :]["name"]
bottomStore = storeSummary.iloc[-1, :]["name"]

report = pd.DataFrame(
    {
        "totalRevenue": totalRevenue,
        "totalProfit": totalProfit,
        "topProduct": topProduct,
        "bottomProduct": bottomProduct,
        "topStore": topStore,
        "bottomStore": bottomStore
    },
    index=[0]
    )

In [23]:
print("------------------------------------Key metrics------------------------------------------------")
report

------------------------------------Key metrics------------------------------------------------


Unnamed: 0,totalRevenue,totalProfit,topProduct,bottomProduct,topStore,bottomStore
0,4100.0,1440.0,Laptop Pro 14,LED Bulb Pack,Urban Mart - NY,BudgetBazaar - FL


In [24]:
report.to_csv("key_metrics_report.csv")