In [2]:
# Step 1: Review Old Code (Mental Check)

# Look at your Day 15â€“17 code and ask:

# Are variable names clear?

# Is logic repeated?

# Can someone else understand this?

In [4]:
import pandas as pd
dataset = pd.read_csv('Datasets/retail_sales_dataset.csv')

In [12]:
# Step 2: Bad vs Good Code (Example)

# Before (Messy)
a = dataset.groupby('Product Category')['Total Amount'].sum()

# After (Professional)
revenue_by_category = (
    dataset.groupby('Product Category')['Total Amount']
      .sum()
)

In [18]:
# Step 3: Create Reusable Functions (PRO MOVE)
# Example: Revenue by Any Column

def revenue_by(dataset, column_name):
    return (
        dataset.groupby(column_name)['Total Amount']
          .sum()
          .sort_values(ascending=False)
    )

In [22]:
# Usage:
revenue_by_product_category = revenue_by(dataset, 'Product Category')
revenue_by_gender = revenue_by(dataset, 'Gender')

print(revenue_by_product_category,'\n\n', revenue_by_gender)

Product Category
Electronics    156905
Clothing       155580
Beauty         143515
Name: Total Amount, dtype: int64 

 Gender
Female    232840
Male      223160
Name: Total Amount, dtype: int64


In [24]:
# Step 4: Refactor Validation Logic
# Instead of scattered checks, centralize them.

def validate_sales_data(df):
    assert dataset['Quantity'].min() > 0
    assert dataset['Price per Unit'].min() > 0
    assert dataset['Total Amount'].min() > 0
    assert dataset['Transaction ID'].is_unique

In [26]:
# Usage:
validate_sales_data(dataset)

In [28]:
# Step 5: Organize Code into Sections

# 1. Imports
# 2. Load data
# 3. Cleaning
# 4. Feature engineering
# 5. Validation
# 6. Analysis
# 7. Insights

In [32]:
# Step 6: Create an Insight Summary Function

def print_key_insights(df):
    print("Total Revenue:", dataset['Total Amount'].sum())
    print("Top Category:")
    print(revenue_by(dataset, 'Product Category').head(1))

print_key_insights(dataset)

Total Revenue: 456000
Top Category:
Product Category
Electronics    156905
Name: Total Amount, dtype: int64


In [34]:
# Step 7: Remove Unused Code

# Delete:

# commented junk

# unused variables

# repeated blocks

# Clean notebooks = higher credibility.