In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
df = pd.read_csv('Diwali_Sales.csv', encoding= 'unicode_escape')

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.info()

In [6]:
# Removing empty columns
df.drop(['Status', 'unnamed1'], axis=1, inplace=True)

In [None]:
# Finding null values in each column
pd.isnull(df).sum()

In [8]:
# Removing null values
df.dropna(inplace=True)

In [None]:
pd.isnull(df).sum()

In [9]:
# Getting amount in integers
df['Amount'] = df['Amount'].astype('int')

In [None]:
df['Amount'].dtypes

In [None]:
# Listing all columns
df.columns

In [14]:
# Finding count, average, min, max etc. for 3 columns
df[['Age', 'Orders', 'Amount']].describe()

Unnamed: 0,Age,Orders,Amount
count,11239.0,11239.0,11239.0
mean,35.410357,2.489634,9453.610553
std,12.753866,1.114967,5222.355168
min,12.0,1.0,188.0
25%,27.0,2.0,5443.0
50%,33.0,2.0,8109.0
75%,43.0,3.0,12675.0
max,92.0,4.0,23952.0


# Graphical Data Analysis

### Gender

In [None]:
# Gender vs Number of Orders

ax = sns.countplot(x = 'Gender',data = df)

for bars in ax.containers:
    ax.bar_label(bars)

In [None]:
# Gender vs Amount Spent

sales_gen = df.groupby(['Gender'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False)

sns.barplot(x = 'Gender',y= 'Amount' ,data = sales_gen)

*From above graphs we can see that most of the buyers are females and even the purchasing power of females are greater than men*

### Age Group

In [None]:
# Age Group vs Number of Orders

ax = sns.countplot(data = df, x = 'Age Group', hue = 'Gender')

for bars in ax.containers:
    ax.bar_label(bars)

In [None]:
# Age Group vs Amount Spent

sales_age = df.groupby(['Age Group'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False)

sns.barplot(x = 'Age Group',y= 'Amount' ,data = sales_age)

*From above graphs we can see that most of the buyers are of age group between 26-35 yrs female*

### Marital Status

In [None]:
# Marital Status vs Number of Orders

ax = sns.countplot(data = df, x = 'Marital_Status')

sns.set(rc={'figure.figsize':(7,5)})
for bars in ax.containers:
    ax.bar_label(bars)

In [None]:
# Marital Status vs Amount Spent

sales_state = df.groupby(['Marital_Status', 'Gender'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False)

sns.set(rc={'figure.figsize':(6,5)})
sns.barplot(data = sales_state, x = 'Marital_Status',y= 'Amount', hue='Gender')

*From above graphs we can see that most of the buyers are married women and they spend more money too. Also, even unmarried women purchase more than married men.*

### State

In [None]:
# State vs Number of Orders, finding top 10 states only

sales_state = df.groupby(['State'], as_index=False)['Orders'].sum().sort_values(by='Orders', ascending=False).head(10)

sns.set(rc={'figure.figsize':(15,5)})
sns.barplot(data = sales_state, x = 'State',y= 'Orders')

In [None]:
# State vs Amount Spent, finding top 10 states only

sales_state = df.groupby(['State'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False).head(10)

sns.set(rc={'figure.figsize':(15,5)})
sns.barplot(data = sales_state, x = 'State',y= 'Amount')

*From above graphs we can see that most of the orders & amount of sales are from Uttar Pradesh, Maharashtra and Karnataka respectively*


### Occupation

In [None]:
# Occupation vs Number of Orders

sns.set(rc={'figure.figsize':(20,5)})
ax = sns.countplot(data = df, x = 'Occupation')

for bars in ax.containers:
    ax.bar_label(bars)

In [None]:
# Occupation vs Amount Spent

sales_state = df.groupby(['Occupation'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False)

sns.set(rc={'figure.figsize':(20,5)})
sns.barplot(data = sales_state, x = 'Occupation',y= 'Amount')

*From above graphs we can see that most of the buyers are working in IT, Healthcare and Aviation sector*

### Product Category

In [None]:
# Product Category vs Number of Orders

sns.set(rc={'figure.figsize':(20,5)})
ax = sns.countplot(data = df, x = 'Product_Category')

for bars in ax.containers:
    ax.bar_label(bars)

In [None]:
# Product Category vs Amount Spent

sales_state = df.groupby(['Product_Category'], as_index=False)['Amount'].sum().sort_values(by='Amount', ascending=False).head(10)

sns.set(rc={'figure.figsize':(20,5)})
sns.barplot(data = sales_state, x = 'Product_Category',y= 'Amount')

*From above graphs we can see that most of the sold products are from Food, Clothing and Electronics category*

### Top 10 Products

In [None]:
# Top 10 Ordered Products

sales_state = df.groupby(['Product_ID'], as_index=False)['Orders'].sum().sort_values(by='Orders', ascending=False).head(10)

sns.set(rc={'figure.figsize':(20,5)})
sns.barplot(data = sales_state, x = 'Product_ID',y= 'Orders')

## Conclusion:

### 

*Married women in age group of 26-35 years from UP, Maharashtra and Karnataka working in IT, Healthcare and Aviation Sectors are more likely to buy products from Food, Clothing and Electronics category*