In [1]:
# Step 1: Import required libraries
import pandas as pd
import numpy as np

In [2]:
# Step 2: Create Week 1 Product Data (from SQL)
products_data = {
    'ProductID': [1, 2, 3, 4, 5, 6],
    'ProductName': ['Laptop', 'Mobile Phone', 'Shoes', 'T-Shirt', 'Smart Watch', 'Bluetooth Speaker'],
    'Category': ['Electronics', 'Electronics', 'Footwear', 'Clothing', 'Electronics', 'Electronics'],
    'Price': [55000.0, 25000.0, 2000.0, 800.0, 5000.0, 1500.0],
    'Stock': [30, 50, 100, 70, 40, 25]
}
products_df = pd.DataFrame(products_data)

# Step 3: Create Week 1 Sales Data (from SQL)
sales_data = {
    'SaleID': [301, 302, 303, 304, 305, 306, 307],
    'ProductID': [1, 2, 3, 5, 4, 2, 6],
    'StoreID': [101, 102, 103, 102, 101, 104, 105],
    'QuantitySold': [3, 5, 2, 6, 4, 7, 5],
    'SaleDate': pd.to_datetime(['2024-07-01', '2024-07-02', '2024-07-02', '2024-07-03', '2024-07-03', '2024-07-04', '2024-07-04'])
}
sales_df = pd.DataFrame(sales_data)

In [3]:
# Step 4: Merge sales and product data
merged_df = pd.merge(sales_df, products_df, on='ProductID', how='left')

In [4]:
# Step 5: Data Cleanup (check and handle nulls)
print("🔍 Missing Values:\n", merged_df.isnull().sum())

🔍 Missing Values:
 SaleID          0
ProductID       0
StoreID         0
QuantitySold    0
SaleDate        0
ProductName     0
Category        0
Price           0
Stock           0
dtype: int64


In [5]:
# Step 6: Calculate Revenue, Cost, and Profit
merged_df['Revenue'] = merged_df['QuantitySold'] * merged_df['Price']
merged_df['Cost'] = merged_df['Price'] * 0.80  # Assuming cost = 80% of price
merged_df['Profit'] = merged_df['Revenue'] - (merged_df['Cost'] * merged_df['QuantitySold'])

In [6]:
# Step 7: Summary Metrics
store_summary = merged_df.groupby('StoreID')[['Revenue', 'Profit']].sum()
print("\n Revenue & Profit by Store:\n", store_summary)


 Revenue & Profit by Store:
           Revenue   Profit
StoreID                   
101      168200.0  33640.0
102      155000.0  31000.0
103        4000.0    800.0
104      175000.0  35000.0
105        7500.0   1500.0


In [7]:
# Step 8: Export Cleaned Dataset
merged_df.to_csv('cleaned_sales_data.csv', index=False)

In [8]:
from google.colab import files
files.download('cleaned_sales_data.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>