In [1]:
import pandas as pd
import plotly.express as px

In [2]:
df = pd.read_csv(r'/kaggle/input/retail-sales-dataset/retail_sales_dataset.csv')
df.head()

Unnamed: 0,Transaction ID,Date,Customer ID,Gender,Age,Product Category,Quantity,Price per Unit,Total Amount
0,1,2023-11-24,CUST001,Male,34,Beauty,3,50,150
1,2,2023-02-27,CUST002,Female,26,Clothing,2,500,1000
2,3,2023-01-13,CUST003,Male,50,Electronics,1,30,30
3,4,2023-05-21,CUST004,Male,37,Clothing,1,500,500
4,5,2023-05-06,CUST005,Male,30,Beauty,2,50,100


In [3]:
bins = [0,18,25,40,60,100]
labels =['0-18','18-25','25-40','40-60','60-100']
df['Age Group']=pd.cut(df['Age'],bins=bins ,labels=labels ,right=True)
df.rename(columns = {'Price per Unit':'Price'},inplace=True)
df.head()

Unnamed: 0,Transaction ID,Date,Customer ID,Gender,Age,Product Category,Quantity,Price,Total Amount,Age Group
0,1,2023-11-24,CUST001,Male,34,Beauty,3,50,150,25-40
1,2,2023-02-27,CUST002,Female,26,Clothing,2,500,1000,25-40
2,3,2023-01-13,CUST003,Male,50,Electronics,1,30,30,40-60
3,4,2023-05-21,CUST004,Male,37,Clothing,1,500,500,25-40
4,5,2023-05-06,CUST005,Male,30,Beauty,2,50,100,25-40


*Which combination of gender and age group buys the most expensive products?*

In [7]:
grouped = df.groupby(['Product Category','Age Group'])['Total Amount'].max().reset_index()
top_combination = grouped.sort_values(by='Total Amount', ascending=False).reset_index()

fig = px.bar(
    top_combination,
    x="Product Category",
    y="Total Amount",
    color="Age Group",
    barmode="group",
    title="Most Expensive Product Price by Age Group",
    labels={"Total Amount": "Max Amount", "Product Category": "Product Category", "Age Group": "Age Group"}
)

fig.update_layout(
    xaxis_title="Product Category",
    yaxis_title="Total Amount",
    legend_title="Age Group"
)

fig.show()






*Are there specific product categories preferred by certain demographic segments?*

In [5]:
category_pref = (
    df.groupby(["Gender", "Product Category"])
      .agg(total_spent=("Total Amount", "sum"))
      .reset_index()
)

# Visualization - grouped bar chart
fig = px.bar(
    category_pref,
    x="Product Category",
    y="total_spent",
    color="Gender",
    barmode="group",
    title="Product Category Preferences by Gender",
    labels={"total_spent": "Total Purchase Amount"}
)

fig.show()

category_pref

Unnamed: 0,Gender,Product Category,total_spent
0,Female,Beauty,74830
1,Female,Clothing,81275
2,Female,Electronics,76735
3,Male,Beauty,68685
4,Male,Clothing,74305
5,Male,Electronics,80170


*What is the average transaction value per customer segment (e.g., by gender or age range)?*

In [6]:
#If you use .mean() directly, it will give you the average revenue per row in your dataset.
#So we use the sum() and count() method

avg_trans_by_gender = df.groupby('Gender').agg(
    total_revenue=('Total Amount', 'sum'),
    transaction_count=('Transaction ID', 'count')
).reset_index()

avg_trans_by_gender['avg_transaction_value'] = (
    avg_trans_by_gender['total_revenue'] / avg_trans_by_gender['transaction_count']
)

# Visualization
fig_gender = px.bar(
    avg_trans_by_gender,
    x='Gender',
    y='avg_transaction_value',
    color='Gender',
    color_continuous_scale='Blues',
    title='Average Transaction Value by Gender',
    labels={'avg_transaction_value': 'Avg Transaction Value'},
    text='avg_transaction_value'
)
fig_gender.show()