In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
shop= pd.read_csv('shopping_trends_updated(in).csv')

In [None]:
shop.shape

In [None]:
shop.head()

In [None]:
shop.dtypes

In [None]:
shop.columns 

In [None]:
shop.info()

In [None]:
shop.isnull().sum()

In [None]:
print(f"The unique values of the Gender column {shop['Gender'].unique()}")
print()
print(f"The unique values of the Category column {shop['Category'].unique()}")
print()
print(f"The unique values of the Size column {shop['Size'].unique()}")
print()
print(f"The unique values of the Shipping Type column {shop['Shipping Type'].unique()}")
print()
print(f"The unique values of the Payment Method column {shop['Payment Method'].unique()}")

1. What is the overall distribution of customer ages in the dataset?

In [None]:
shop['Age'].value_counts()

In [None]:
shop['Age'].mean()

In [None]:
shop['Gender'].unique()

In [None]:
shop['Age_Category'] = pd.cut(shop['Age'], bins=[0, 15, 18, 30, 50, 70], 
                               labels=['child', 'teen', 'Young Adult', 'Adult', 'Old'])

fig=px.histogram(shop, x='Age', y='Age_Category')
fig.show()

2. How does the average purchase amount vary across different product categories?

In [None]:
shop['Category'].unique()

In [None]:
shop.groupby('Category')['Purchase Amount (USD)'].mean()

3. Which gender has the highest number of purchases?

In [None]:
sns.barplot(shop, x='Gender', y='Purchase Amount (USD)')

4. What are the most commonly purchased items in each category?

In [None]:
shop.groupby('Category')['Item Purchased'].value_counts()

In [None]:
fig=px.histogram(shop, x='Item Purchased', color='Category')
fig.show()

5. Are there any seasons or months where customer spending is significantly higher?

In [None]:
shop['Season'].unique()

In [None]:
shop['Season'].value_counts()

In [None]:
fig=px.histogram(shop, x='Season', range_y=[800, 1000])
fig.show()

6. What is the average rating given by the customers for each product category?

In [None]:
shop_groupby= shop.groupby('Category')['Review Rating'].mean().reset_index()
print(shop_groupby)

In [None]:
fig=px.bar(shop, x='Category', y='Review Rating')
fig.show()

7. Are there notable differences in purchase behaviour between subscribed and non-subscribed customers?

In [None]:
shop['Subscription Status'].value_counts()

In [None]:
sns.barplot(shop, x='Subscription Status', y='Purchase Amount (USD)')

In [None]:
shop['Purchase Amount (USD)'].sum()

In [None]:
shop.groupby('Subscription Status')['Purchase Amount (USD)'].mean()

8. Which payment system is most popular among customers?

In [None]:
shop.groupby('Payment Method')['Purchase Amount (USD)'].mean().sort_values(ascending=False)

In [None]:
shop_groupby=shop.groupby('Payment Method')['Purchase Amount (USD)'].mean().reset_index()

In [None]:
fig= px.bar(shop_groupby, x='Payment Method', y='Purchase Amount (USD)')
fig.show()

9. Do customers who use promo code tend to spend more than those who don't?

In [None]:
shop_groupby=shop.groupby('Promo Code Used')['Purchase Amount (USD)'].sum().reset_index()

In [None]:
fig= px.sunburst(shop, path=['Gender', 'Promo Code Used'], values='Purchase Amount (USD)')
fig.show()

In [None]:
fig= px.bar(shop_groupby, x='Promo Code Used', y='Purchase Amount (USD)')
fig.show()

10. How does the frequency of purchases vary across different age groups?

In [None]:
shop[['Age', 'Age_Category']]

In [None]:
shop['Age_Category'].unique()

In [None]:
shop_groupby=shop.groupby('Frequency of Purchases')['Age'].sum()
px.sunburst(shop, path=['Frequency of Purchases', 'Age_Category'], values='Age')

11. Are there any correlations between size of product and purchase amount?

In [None]:
shop_groupby=shop.groupby('Size')['Purchase Amount (USD)'].sum().reset_index()

In [None]:
fig= px.bar(shop_groupby, x='Size', y='Purchase Amount (USD)')
fig.show()

12. Which shipping type is preferred by customers for different product categories?

In [None]:
shop.groupby('Category')['Shipping Type'].value_counts().sort_values(ascending=False)

In [None]:
shop['Category'].unique()

13. How does the presence of a discount affect the purchase decision of customers?

In [None]:
shop_group=shop.groupby('Discount Applied')['Purchase Amount (USD)'].sum().reset_index()
px.histogram(shop_group, x='Discount Applied', y='Purchase Amount (USD)')

In [None]:
fig=px.sunburst(shop, path=['Gender', 'Discount Applied'], values='Purchase Amount (USD)', color='Gender')
fig.show()

14. Are there specific colors that are more popular among customers?

In [None]:
px.histogram(shop, x='Color')

In [None]:
shop['Color'].value_counts()

15. What is the average number of previous purchases made by a customer?

In [None]:
shop['Previous Purchases'].mean()

16. Are there noticable differences in purchase behaviors at different locations?

In [None]:
shop.groupby('Location')['Purchase Amount (USD)'].mean().sort_values(ascending=False)

In [None]:
shop_group=shop.groupby('Location')['Purchase Amount (USD)'].mean().reset_index()
fig=px.bar(shop_group, x='Location', y='Purchase Amount (USD)')
fig.show()

17. Is there relationship between customer age and category of product they purchase?

In [None]:
shop_group=shop.groupby('Category')['Age'].mean().reset_index()

In [None]:
fig=px.bar(shop_group, x='Category', y='Age')
fig.show()


18. How does avergae purchase amount differ between men and women?

In [None]:
shop_group=shop.groupby('Gender')['Purchase Amount (USD)'].sum().reset_index()
fig=px.bar(shop_group, x='Gender', y='Purchase Amount (USD)')
fig.show()

In [None]:
px.sunburst(data_frame= shop, path=['Gender', 'Age_Category'], values='Purchase Amount (USD)')

19. What is the distribution of products across different categories?

In [None]:
shop.groupby('Category').size().plot(kind='barh', color=sns.color_palette('Blues'))
plt.gca().spines[['top', 'right']].set_visible(False)
plt.title('Product Distribution by Category')
plt.tight_layout()
plt.show()

20. What is the distribution of different shipping types used for orders?

In [None]:
shop['Shipping Type'].value_counts().plot(kind='barh', color=sns.light_palette("skyblue", reverse=True))
plt.gca().spines[['top', 'right']].set_visible(False)
plt.title('Distribution of Shipping Types')
plt.tight_layout()
plt.show()