# Sales Analysis of E-Commerce Platform

## Importing necessary libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
print("Imported")

Imported


## Data Cleaning

In [2]:
data=pd.read_csv(r"C:\Users\pramo\Datasets\datasets\EcomerceSalesData.csv")
data

Unnamed: 0,User_ID,Cust_name,Product_ID,Gender,Age Group,Age,Marital_Status,State,Zone,Occupation,Product_Category,Orders,Amount,Status,unnamed1
0,1002903,Sanskriti,P00125942,F,26-35,28,0,Maharashtra,Western,Healthcare,Auto,1,23952.0,,
1,1000732,Kartik,P00110942,F,26-35,35,1,Andhra Pradesh,Southern,Govt,Auto,3,23934.0,,
2,1001990,Bindu,P00118542,F,26-35,35,1,Uttar Pradesh,Central,Automobile,Auto,3,23924.0,,
3,1001425,Sudevi,P00237842,M,0-17,16,0,Karnataka,Southern,Construction,Auto,2,23912.0,,
4,1000588,Joni,P00057942,M,26-35,28,1,Gujarat,Western,Food Processing,Auto,2,23877.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11246,1000695,Manning,P00296942,M,18-25,19,1,Maharashtra,Western,Chemical,Office,4,370.0,,
11247,1004089,Reichenbach,P00171342,M,26-35,33,0,Haryana,Northern,Healthcare,Veterinary,3,367.0,,
11248,1001209,Oshin,P00201342,F,36-45,40,0,Madhya Pradesh,Central,Textile,Office,4,213.0,,
11249,1004023,Noonan,P00059442,M,36-45,37,0,Karnataka,Southern,Agriculture,Office,3,206.0,,


In [None]:
data.shape

In [None]:
data.head()
#data.tail() 

In [None]:
data.info()

### Dropping Unnecessary Columns

In [None]:
data.drop(['Status','unnamed1'],axis=1,inplace=True)
data

In [None]:
data.info()

In [None]:
data.isnull().sum()

### Dropping Null values

In [None]:
data.dropna(inplace=True)
data.isnull().sum()

### Changing Datatypes

In [None]:
# Change datatypes
##df = df.astype({'A': 'int', 'B': 'float', 'C': 'int'}) change multiple at once
data['Amount']=data['Amount'].astype(int)
data['Marital_Status']=data['Marital_Status'].astype(object)
data['Marital_Status'] = data['Marital_Status'].replace({0: "Unmarried", 1: "Married"})
data['Gender'] = data['Gender'].replace({'M': "Male", "F": "Female"})

In [None]:
data.dtypes

### Renaming columns for readability

In [None]:
#renaming columns
data.rename(columns={'Orders':'Order_count','Amount':'Total_Amount','Age Group':'Age_Group'},inplace=True)
data.columns

In [None]:
data.describe() 
data[['Age','Order_count','Total_Amount']].describe()

## Exploratory data Analysis

In [None]:
#sns.set_style('whitegrid')  # Replace with any style ('darkgrid', 'dark', 'white', 'ticks')
sns.set_style('darkgrid')
sns.set_palette((sns.color_palette('viridis')))

### Sales by Gender

In [None]:
# CountPlot to see number of male and female orders
#palettes=set1,set2,set3,deep,muted,bright,dark,pastel,Blues,BuGn,OrRd
Count_plot=sns.countplot(data,x='Gender')
Count_plot.set(xlabel="Gender", ylabel="OrderCount", title="OrderCount by Gender")
#to get count labels on bars
for bars in Count_plot.containers:
    Count_plot.bar_label(bars)

In [None]:
grouped_df=data.groupby('Gender',as_index=False)['Total_Amount'].sum()
plot2=sns.barplot(grouped_df,x='Gender',y='Total_Amount')
plot2.set(xlabel="Gender", ylabel="Total_Amount", title="Total_Revenue by Gender")
for bars in plot2.containers:
    plot2.bar_label(bars)

##### From the above plot we can conclude that the total Amount spent by women is greater over men i.e almost the double of amount spent my men

### Sales by Age_Group

In [None]:
# Count of age groups
plot3=sns.countplot(data,x='Age_Group',hue='Gender',palette='viridis',order=['0-17','18-25','26-35','36-45','46-50','51-55','55+'])
plot3.set(xlabel="Age_Group", ylabel="Order_Count", title="OrderCount by Age_Group")
for bars in plot3.containers:
    plot3.bar_label(bars)

In [None]:
#AmountSpent by different age groups
grouped_df=data.groupby('Age_Group',as_index=False)['Total_Amount'].sum().sort_values(by='Total_Amount',ascending=True)
plot2=sns.barplot(grouped_df,x='Age_Group',y='Total_Amount',palette='viridis')
plot2.set(xlabel="Gender", ylabel="Total_Amount", title="Total Revenue by Age_Group")
plt.show()

##### From the above figure we can conclude that the age group people of age(26-35) have Spent Comparatively more amount than other age groups.


In [None]:
data['Marital_Status'].value_counts()

### Sales by Marital_Status

In [None]:
#to get count labels on bars
maritial_status_count=sns.countplot(data,x='Marital_Status',palette='viridis',hue='Gender')
plt.figure(figsize=(8,5))
maritial_status_count.set(xlabel="Marital_status", ylabel="OrderCount", title="Orders Count by Marital_status")
for bars in maritial_status_count.containers:
    maritial_status_count.bar_label(bars)

In [None]:
marital_status_amount=data.groupby(['Marital_Status','Gender'],as_index=False,)['Total_Amount'].sum().sort_values(by='Total_Amount',ascending=False)
#sns.set(rc={'figure.figsize':(26,7)})
plt.figure(figsize=(8,5))
plot=sns.barplot(marital_status_amount,x='Marital_Status',y='Total_Amount',palette='BuGn',hue='Gender')
plot.set(xlabel="Marital_status", ylabel="Total_Amount", title="Total Revenue by Marital_status")

for bars in plot.containers:
    plot.bar_label(bars)

##### Revenue generated by Unmarried people is more compared to married people and are mostly women.

### Sales by Occupation

In [None]:
plt.figure(figsize=(25,9))
occupation_sales_count=sns.countplot(data,x='Occupation',palette='viridis')
for bars in occupation_sales_count.containers:
    occupation_sales_count.bar_label(bars)

In [None]:
occupation_sales_amount=data.groupby('Occupation',as_index=False,)['Total_Amount'].sum().sort_values(by='Total_Amount',ascending=False)
#sns.set(rc={'figure.figsize':(26,7)})
plt.figure(figsize=(26,7))
plot=sns.barplot(occupation_sales_amount,x='Occupation',y='Total_Amount',palette='viridis')
for bars in plot.containers:
    plot.bar_label(bars)

### Sales by State

In [None]:
State_sales_count=data.groupby('State',as_index=False)['Order_count'].sum().sort_values(by='Order_count',ascending=False)
#sns.set(rc={'figure.figsize':(26,7)}) changes default size
plt.figure(figsize=(26,7))
plot=sns.barplot(State_sales_count,x='State',y='Order_count',palette='viridis')
plot.set(xlabel="State", ylabel="order_quantity", title="Order Quantity by State")
for bars in plot.containers:
    plot.bar_label(bars)

In [None]:
state_sales_amount=data.groupby('State',as_index=False)['Total_Amount'].sum().sort_values(by='Total_Amount',ascending=False)
#sns.set(rc={'figure.figsize':(26,7)})
plt.figure(figsize=(26,7))
plot=sns.barplot(state_sales_amount,x='State',y='Total_Amount',palette='viridis')
plot.set(xlabel="State", ylabel="Total_Amount", title="Total revenue by State")
for bars in plot.containers:
    plot.bar_label(bars)
plt.show()

##### Most of the revenue generated by the company is from UttarPradesh with highest of all, followed by Maharashtra and Karnataka in the third place.

### Sales by Product_Category

In [None]:
plt.figure(figsize=(25,7))
product_category_sales_count=sns.countplot(data,x='Product_Category',palette='viridis')
product_category_sales_count.set(xlabel="Product_Category", ylabel="Order_Count", title="Orders Count by Product_category")
for bars in product_category_sales_count.containers:
    product_category_sales_count.bar_label(bars)

In [None]:
product_category_sales_amount=data.groupby('Product_Category',as_index=False,)['Total_Amount'].sum().sort_values(by='Total_Amount',ascending=False)
#sns.set(rc={'figure.figsize':(26,7)})
plt.figure(figsize=(30,7))
plot=sns.barplot(product_category_sales_amount,x='Product_Category',y='Total_Amount',palette='viridis')
plot.set(xlabel="Product_Category", ylabel="Total_Amount", title="Total Revenue by Product_category")

for bars in plot.containers:
    plot.bar_label(bars)

## Interpretation

### Top Five performing States and Product_Categories

In [None]:
plt.figure(figsize=(20,7))
plot=sns.barplot(state_sales_amount.head(5),x='State',y='Total_Amount',palette='viridis')
plot.set(xlabel="State", ylabel="Total_Amount", title= "Top Five States by Revenue")
plt.show()

In [None]:
plt.figure(figsize=(20,7))
plot=sns.barplot(product_category_sales_amount.head(5),x='Product_Category',y='Total_Amount',palette='viridis')
plot.set(xlabel="Product_Category", ylabel="Total_Amount", title="Top Five Product Categories")
plt.show()

### Overally, We can conclude that the company generated most of revenue from product categories Food, Clothing & apparel and Electronic & gadgets with Uttarpradesh state contributing significantly higher revenue than rest of the states, attracting most of the women customers with their products.