In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
df = pd.read_csv(r"/kaggle/input/retail-sales-dataset/retail_sales_dataset.csv")
df.head()

Unnamed: 0,Transaction ID,Date,Customer ID,Gender,Age,Product Category,Quantity,Price per Unit,Total Amount
0,1,2023-11-24,CUST001,Male,34,Beauty,3,50,150
1,2,2023-02-27,CUST002,Female,26,Clothing,2,500,1000
2,3,2023-01-13,CUST003,Male,50,Electronics,1,30,30
3,4,2023-05-21,CUST004,Male,37,Clothing,1,500,500
4,5,2023-05-06,CUST005,Male,30,Beauty,2,50,100


*How does purchasing behavior differ between genders?*

**Most of the Sales**

In [3]:
most_sales = df.groupby('Gender')['Total Amount'].sum().reset_index()
most_sales.columns = ['Gender','Purchase']
most_sales

Unnamed: 0,Gender,Purchase
0,Female,232840
1,Male,223160


**Stat of Purchase Behavior**

In [4]:
gender_stat = df.groupby('Gender').agg(
    total_revenue =('Total Amount','sum'),
    avg_revenue=('Total Amount', 'mean'),
    transaction_count=('Total Amount', 'count')
).reset_index()
gender_stat

Unnamed: 0,Gender,total_revenue,avg_revenue,transaction_count
0,Female,232840,456.54902,510
1,Male,223160,455.428571,490


In [5]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=gender_stat['Gender'],
    y=gender_stat['total_revenue'],
    name='Total Revenue',
    visible=True
))

fig.add_trace(go.Bar(
    x=gender_stat['Gender'],
    y=gender_stat['avg_revenue'],
    name = 'Average Revenue',
    visible = False
))

fig.add_trace(go.Bar(
    x=gender_stat['Gender'],
    y=gender_stat['transaction_count'],
    name = 'Transactions',
    visible = False
))

fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list([
                dict(label="Total Revenue",
                        method = "update",
                        args=[{"visible":[True,False,True]},
                                 {"title":"Total Revenue by Gender",
                                     "yaxis":{"title": "Revenue"},
                                     "xaxis":{"title":"Gender"}}]),
                dict(label="Average Revenue",
                        method="update",
                        args=[{"visible":[False,True,False]},
                                 {"title":"Average Revenue by Gender",
                                     "yaxis":{"title": "Revenue"},
                                     "xaxis":{"title":"Gender"}}]),
                dict(label="Total No.of.Transactions",
                        method="update",
                        args=[{"visible":[False,False,True]},
                                {"title":"Transaction Count by Gender",
                                    "yaxis":{"title":"Revenue"},
                                    "xaxis":{"title":"Gender"}}])
            ])
        )
    ]
)

fig.update_layout(title="Purchasing Behaviour by Gender", yaxis_title="Revenue",xaxis_title="Gender",xaxis=dict(tickangle=-45))
fig.show()    

*Which age group contributes the most to total sales?*

In [14]:
#CREATING AGE GROUP COLUMN BASED WITH AGE COLUMN
bins = [0,18,25,40,60,100]
labels = ['<18','18-25','25-40','40-60','60-100']
df['Age Group']=pd.cut(df['Age'],bins=bins, labels=labels, right=True)
df.head()

Unnamed: 0,Transaction ID,Date,Customer ID,Gender,Age,Product Category,Quantity,Price per Unit,Total Amount,Age Group
0,1,2023-11-24,CUST001,Male,34,Beauty,3,50,150,25-40
1,2,2023-02-27,CUST002,Female,26,Clothing,2,500,1000,25-40
2,3,2023-01-13,CUST003,Male,50,Electronics,1,30,30,40-60
3,4,2023-05-21,CUST004,Male,37,Clothing,1,500,500,25-40
4,5,2023-05-06,CUST005,Male,30,Beauty,2,50,100,25-40


In [8]:
age_sales = df.groupby('Age Group')['Total Amount'].sum().reset_index()
age_sales = age_sales.sort_values(by='Total Amount', ascending=False)

fig = px.bar(
    age_sales,
    x='Age Group',
    y='Total Amount',
    title='Total Sales by Age Group',
    labels={'Total Amount':'Revenue','Age Group':'Age Groups'}
)
fig.show()

top_group = age_sales.iloc[0]
print(f"Top Age Group: {top_group['Age Group']} with total revenue of {top_group['Total Amount']}")





Top Age Group: 40-60 with total revenue of 193880


*What is the average age of customers, and how does it vary by product category?*

In [9]:
avg_age_per_category = df.groupby('Product Category').agg(avg_age=('Age','mean')).reset_index()

fig = px.bar(
    avg_age_per_category,
    x='Product Category',
    y='avg_age',
    title='Average Age of Customers per Category',
    labels={'Product Category':'Product','avg_age':'Age Group'}
)

fig.show()

print("AVERAGE AGE PER CATEGORY",avg_age_per_category)

AVERAGE AGE PER CATEGORY   Product Category    avg_age
0           Beauty  40.371336
1         Clothing  41.948718
2      Electronics  41.736842


*Are there any trends in purchase frequency by gender or age?*

In [10]:
#GENDER TRENDS
purchase_frequency_by_gender=df.groupby('Gender').agg(
    transaction_count=('Transaction ID','count'),
    unique_customers=('Customer ID', 'nunique')
).reset_index()

purchase_frequency_by_gender['avg_transaction_per_customer']= (
    purchase_frequency_by_gender['transaction_count'] / purchase_frequency_by_gender['unique_customers']
)

purchase_frequency_by_gender

Unnamed: 0,Gender,transaction_count,unique_customers,avg_transaction_per_customer
0,Female,510,510,1.0
1,Male,490,490,1.0


In [11]:
# AGE TRENDS
purchase_frequency_per_age=df.groupby('Age Group').agg(
    transaction_count=('Transaction ID','count'),
    unique_customers=('Customer ID', 'nunique')
).reset_index()

purchase_frequency_per_age['avg_transaction_per_customer']= (
    purchase_frequency_per_age['transaction_count'] / purchase_frequency_by_gender['unique_customers']
)

purchase_frequency_per_age




invalid value encountered in greater


invalid value encountered in less


invalid value encountered in greater



Unnamed: 0,Age Group,transaction_count,unique_customers,avg_transaction_per_customer
0,<18,21,21,0.041176
1,18-25,148,148,0.302041
2,25-40,297,297,
3,40-60,441,441,
4,60-100,93,93,


In [12]:
fig = go.Figure()

# Gender Group traces
fig.add_trace(go.Bar(
    x=purchase_frequency_by_gender['Gender'],
    y=purchase_frequency_by_gender['transaction_count'],
    name='Total Transactions (Gender)',
    visible=True
))
fig.add_trace(go.Bar(
    x=purchase_frequency_by_gender['Gender'],
    y=purchase_frequency_by_gender['unique_customers'],
    name='Unique Customers (Gender)',
    visible=False
))
fig.add_trace(go.Bar(
    x=purchase_frequency_by_gender['Gender'],
    y=purchase_frequency_by_gender['avg_transaction_per_customer'],
    name='Avg Transactions per Customer (Gender)',
    visible=False
))

# Age Group traces
fig.add_trace(go.Bar(
    x=purchase_frequency_per_age['Age Group'],
    y=purchase_frequency_per_age['transaction_count'],
    name='Total Transactions (Age Group)',
    visible=False
))
fig.add_trace(go.Bar(
    x=purchase_frequency_per_age['Age Group'],
    y=purchase_frequency_per_age['unique_customers'],
    name='Unique Customers (Age Group)',
    visible=False
))
fig.add_trace(go.Bar(
    x=purchase_frequency_per_age['Age Group'],
    y=purchase_frequency_per_age['avg_transaction_per_customer'],
    name='Avg Transactions per Customer (Age Group)',
    visible=False
))

# Helper to generate visibility masks for all 6 traces
def get_visibility(dimension, metric):

    vis = [False] * 6
    if dimension == 'Gender':
        base = 0
    else:  # Age
        base = 3

    if metric == 'Total Transactions':
        vis[base + 0] = True
    elif metric == 'Unique Customers':
        vis[base + 1] = True
    else:  # Avg Transactions per Customer
        vis[base + 2] = True
    return vis

# Update menus
fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(label="Gender", method="update",
                     args=[{"visible": get_visibility("Gender", "Total Transactions")},
                           {"title": "Total Transactions by Gender",
                            "xaxis": {"title": "Gender"},
                            "yaxis": {"title": "Number of Transactions"}}]),
                dict(label="Age Group", method="update",
                     args=[{"visible": get_visibility("Age Group", "Total Transactions")},
                           {"title": "Total Transactions by Age Group",
                            "xaxis": {"title": "Age Group"},
                            "yaxis": {"title": "Number of Transactions"}}]),
            ],
            direction="down",
            showactive=True,
            x=0,
            y=1.15,
            xanchor='left',
            yanchor='top',
            pad={"r": 10, "t": 10},
            name='Dimension'
        ),
        dict(
            buttons=[
                dict(label="Total Transactions", method="update",
                     args=[{"visible": get_visibility("Gender", "Total Transactions")},
                           {"title": "Total Transactions by Gender",
                            "xaxis": {"title": "Gender"},
                            "yaxis": {"title": "Number of Transactions"}}]),
                dict(label="Unique Customers", method="update",
                     args=[{"visible": get_visibility("Gender", "Unique Customers")},
                           {"title": "Unique Customers by Gender",
                            "xaxis": {"title": "Gender"},
                            "yaxis": {"title": "Number of Customers"}}]),
                dict(label="Avg Transactions per Customer", method="update",
                     args=[{"visible": get_visibility("Gender", "Avg Transactions per Customer")},
                           {"title": "Average Transactions per Customer by Gender",
                            "xaxis": {"title": "Gender"},
                            "yaxis": {"title": "Average Transactions"}}]),
            ],
            direction="down",
            showactive=True,
            x=0.3,
            y=1.15,
            xanchor='left',
            yanchor='top',
            pad={"r": 10, "t": 10},
            name='Metric'
        ),
    ]
)

# Initial layout
fig.update_layout(
    title="Total Transactions by Gender",
    xaxis=dict(tickangle=-45),
    margin=dict(t=100)  # space for dropdowns
)

fig.show()
