##Import libraries

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
import warnings

In [None]:
df = pd.read_csv('/content/shopping_trends.csv')

##Exploring the dataset

In [None]:
df.head()

Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,Review Rating,Subscription Status,Payment Method,Shipping Type,Discount Applied,Promo Code Used,Previous Purchases,Preferred Payment Method,Frequency of Purchases
0,1,55,Male,Blouse,Clothing,53,Kentucky,L,Gray,Winter,3.1,Yes,Credit Card,Express,Yes,Yes,14,Venmo,Fortnightly
1,2,19,Male,Sweater,Clothing,64,Maine,L,Maroon,Winter,3.1,Yes,Bank Transfer,Express,Yes,Yes,2,Cash,Fortnightly
2,3,50,Male,Jeans,Clothing,73,Massachusetts,S,Maroon,Spring,3.1,Yes,Cash,Free Shipping,Yes,Yes,23,Credit Card,Weekly
3,4,21,Male,Sandals,Footwear,90,Rhode Island,M,Maroon,Spring,3.5,Yes,PayPal,Next Day Air,Yes,Yes,49,PayPal,Weekly
4,5,45,Male,Blouse,Clothing,49,Oregon,M,Turquoise,Spring,2.7,Yes,Cash,Free Shipping,Yes,Yes,31,PayPal,Annually


In [None]:
df.columns

Index(['Customer ID', 'Age', 'Gender', 'Item Purchased', 'Category',
       'Purchase Amount (USD)', 'Location', 'Size', 'Color', 'Season',
       'Review Rating', 'Subscription Status', 'Payment Method',
       'Shipping Type', 'Discount Applied', 'Promo Code Used',
       'Previous Purchases', 'Preferred Payment Method',
       'Frequency of Purchases'],
      dtype='object')

##Exploratory Data Analysis

In [None]:
# Age Distribution
fig_age = px.histogram(
    df,
    x='Age',
    nbins=50,
    title='Age Distribution of Customers',
    color_discrete_sequence=['cyan']
)
fig_age.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white')
)
fig_age.show()

In [None]:
# Gender Proportions
gender_counts = df['Gender'].value_counts().reset_index()
gender_counts.columns = ['Gender', 'Count']

fig_gender = px.pie(
    gender_counts,
    names='Gender',
    values='Count',
    title='Gender Proportions of Customers',
    color_discrete_sequence=px.colors.sequential.RdBu
)
fig_gender.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white')
)
fig_gender.show()

In [None]:
# Location-Based Analysis
location_counts = df['Location'].value_counts().reset_index()
location_counts.columns = ['Location', 'Count']

fig_location = px.bar(
    location_counts,
    x='Location',
    y='Count',
    text='Count',
    title='Customer Count by Location',
    color_discrete_sequence=['lime']
)
fig_location.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    xaxis_title='Location',
    yaxis_title='Number of Customers'
)
fig_location.show()

In [None]:
# Count the most purchased items
item_counts = df['Item Purchased'].value_counts().reset_index()
item_counts.columns = ['Item Purchased', 'Count']

fig_items = px.bar(
    item_counts,
    x='Item Purchased',
    y='Count',
    text='Count',
    title='Most Purchased Items',
    color_discrete_sequence=['orange']
)
fig_items.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    xaxis_title='Items',
    yaxis_title='Count of Purchases'
)
fig_items.show()


In [None]:
# Purchase Amount Distribution
fig_amount = px.box(
    df,
    y='Purchase Amount (USD)',
    title='Purchase Amount Distribution',
    color_discrete_sequence=['magenta']
)
fig_amount.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    yaxis_title='Purchase Amount (USD)'
)
fig_amount.show()

In [None]:
# Count popular sizes
size_counts = df['Size'].value_counts().reset_index()
size_counts.columns = ['Size', 'Count']

fig_sizes = px.bar(
    size_counts,
    x='Size',
    y='Count',
    text='Count',
    title='Preferred Sizes',
    color_discrete_sequence=['green']
)
fig_sizes.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    xaxis_title='Size',
    yaxis_title='Count of Purchases'
)
fig_sizes.show()

In [None]:
# Count popular colors
color_counts = df['Color'].value_counts().reset_index()
color_counts.columns = ['Color', 'Count']

fig_colors = px.bar(
    color_counts,
    x='Color',
    y='Count',
    text='Count',
    title='Preferred Colors',
    color_discrete_sequence=['teal']
)
fig_colors.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    xaxis_title='Color',
    yaxis_title='Count of Purchases'
)
fig_colors.show()

In [None]:
# Seasonal Trends
season_counts = df['Season'].value_counts().reset_index()
season_counts.columns = ['Season', 'Count']

fig_season = px.bar(
    season_counts,
    x='Season',
    y='Count',
    text='Count',
    title='Seasonal Trends in Purchases',
    color_discrete_sequence=['blue']
)
fig_season.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    xaxis_title='Season',
    yaxis_title='Count of Purchases'
)
fig_season.show()

In [None]:
# Frequency of Purchases
frequency_counts = df['Frequency of Purchases'].value_counts().reset_index()
frequency_counts.columns = ['Frequency', 'Count']

fig_frequency = px.bar(
    frequency_counts,
    x='Frequency',
    y='Count',
    text='Count',
    title='Frequency of Purchases',
    color_discrete_sequence=['red']
)
fig_frequency.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white'),
    xaxis_title='Frequency',
    yaxis_title='Count of Purchases'
)
fig_frequency.show()


##Payment and Discounts

In [None]:
# Count popular payment methods
payment_counts = df['Payment Method'].value_counts().reset_index()
payment_counts.columns = ['Payment Method', 'Count']

fig_payment = px.pie(
    payment_counts,
    names='Payment Method',
    values='Count',
    title='Popular Payment Methods',
    color_discrete_sequence=px.colors.sequential.Plasma
)
fig_payment.update_layout(
    template='plotly_dark',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font=dict(color='white')
)
fig_payment.show()

In [None]:
df["Frequency of Purchases"].value_counts()

Unnamed: 0_level_0,count
Frequency of Purchases,Unnamed: 1_level_1
Every 3 Months,584
Annually,572
Quarterly,563
Monthly,553
Bi-Weekly,547
Fortnightly,542
Weekly,539


####What is the average age of customers in the dataset ?

In [None]:
average_age = df['Age'].mean()
print("Average Age:", average_age)



Average Age: 44.06846153846154


##What is the most common item purchased ?

In [None]:
most_common_item = df['Item Purchased'].mode()[0]
print("Most Common Item Purchased:", most_common_item)

Most Common Item Purchased: Blouse


##What is the total purchase amount for each category

In [None]:
total_purchase_by_category = df.groupby('Category')['Purchase Amount (USD)'].sum()
print("Total Purchase Amount by Category:")
print(total_purchase_by_category)

Total Purchase Amount by Category:
Category
Accessories     74200
Clothing       104264
Footwear        36093
Outerwear       18524
Name: Purchase Amount (USD), dtype: int64


##What is the average review rating for male customers and female customers separately


In [None]:
average_rating_male = df[df['Gender'] == 'Male']['Review Rating'].mean()
average_rating_female = df[df['Gender'] == 'Female']['Review Rating'].mean()
print("Average Review Rating for Male Customers:", average_rating_male)
print("Average Review Rating for Female Customers:", average_rating_female)

Average Review Rating for Male Customers: 3.7539592760180995
Average Review Rating for Female Customers: 3.741426282051282
