`zepto_v2.csv`

In [69]:
import pandas as pd, numpy as np, plotly.express as px
from plotly.subplots import make_subplots as mks
import plotly.graph_objs as go

In [70]:
df = pd.read_csv('zepto_v2.csv', encoding='cp1252')
colors = ["#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F", "#EDC949", "#AF7AA1", "#FF9DA7", "#9C755F", "#BAB0AC", "#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD"]

In [71]:
print(df.max())
print(df.idxmax())
print(df.min())
print(df.idxmin())


Category                            Personal Care
name                      iD Wheat Lachha Paratha
mrp                                        260000
discountPercent                                51
availableQuantity                               6
discountedSellingPrice                     139900
weightInGms                                 10000
outOfStock                                   True
quantity                                     1500
dtype: object
Category                  2753
name                      1382
mrp                        517
discountPercent           2608
availableQuantity           53
discountedSellingPrice     517
weightInGms                108
outOfStock                  87
quantity                  2817
dtype: int64
Category                                                          Beverages
name                      "Godrej Aer Power Pocket - Long Lasting Bathro...
mrp                                                                       0
discountPercent 

In [72]:
dfN = df.drop_duplicates(subset='name').copy()
dfN.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1681 entries, 0 to 3728
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Category                1681 non-null   object
 1   name                    1681 non-null   object
 2   mrp                     1681 non-null   int64 
 3   discountPercent         1681 non-null   int64 
 4   availableQuantity       1681 non-null   int64 
 5   discountedSellingPrice  1681 non-null   int64 
 6   weightInGms             1681 non-null   int64 
 7   outOfStock              1681 non-null   bool  
 8   quantity                1681 non-null   int64 
dtypes: bool(1), int64(6), object(2)
memory usage: 119.8+ KB


In [73]:
labels = ['Category', 'discountPercent', 'availableQuantity']

In [74]:
for value in labels:
    print(f'Unique in {value}: ', df[f'{value}'].sort_values(ascending=True).unique(), 'with length of : ', len(df[f'{value}'].unique()))
    print()

Unique in Category:  ['Beverages' 'Biscuits' 'Chocolates & Candies' 'Cooking Essentials'
 'Dairy, Bread & Batter' 'Fruits & Vegetables' 'Health & Hygiene'
 'Home & Cleaning' 'Ice Cream & Desserts' 'Meats, Fish & Eggs' 'Munchies'
 'Paan Corner' 'Packaged Food' 'Personal Care'] with length of :  14

Unique in discountPercent:  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 35 40 43 45 46 49 50 51] with length of :  42

Unique in availableQuantity:  [0 1 2 3 4 5 6] with length of :  7



In [75]:
dfN

Unnamed: 0,Category,name,mrp,discountPercent,availableQuantity,discountedSellingPrice,weightInGms,outOfStock,quantity
0,Fruits & Vegetables,Onion,2500,16,3,2100,1000,False,1
1,Fruits & Vegetables,Tomato Hybrid,4200,16,3,3500,1000,False,1
2,Fruits & Vegetables,Tender Coconut,5100,15,3,4300,58,False,1
3,Fruits & Vegetables,Coriander Leaves,2000,15,3,1700,100,False,100
4,Fruits & Vegetables,Ladies Finger,1400,14,3,1200,250,False,250
...,...,...,...,...,...,...,...,...,...
3717,Health & Hygiene,Kapiva Wild Tulsi Giloy Juice |,40000,15,1,34000,1000,False,1
3718,Health & Hygiene,Dabur Amla Juice,23500,15,6,19900,1000,False,1
3719,Health & Hygiene,Epigamia Fruit Yogurt Strawberry,4000,50,0,2000,75,True,75
3722,Health & Hygiene,"Eno Cooling Sachets - Cool Mint, 6 Pieces Carton",5400,11,0,4800,5,True,5


In [76]:
fig = px.histogram(dfN, x='Category', color = 'Category', color_discrete_sequence=colors, template='plotly_dark',height=750)
fig.show()

In [77]:
uniqueCategories = dfN['Category'].unique()
n, m = 1, 1
colorPie = ['#ff6361','#ffa600']
subFig1 = mks(rows=1, cols=5, specs=[[{"type": "pie"}, {"type": "pie"}, {"type": "pie"}, {"type": "pie"}, {"type": "pie"}]])
subFig2 = mks(rows=1, cols=4, specs=[[{"type": "pie"}, {"type": "pie"}, {"type": "pie"}, {"type": "pie"}]])

# for value in uniqueCategories:
for value in uniqueCategories:
    if n<=5:
        dfTemp = dfN[dfN['Category'] == f'{value}']
        # figPie = px.pie(dfTemp, names='outOfStock', color ='outOfStock', color_discrete_sequence=colors[n:n+2], template='plotly_dark', width=500, title=f'{value}')
        countBool = dfTemp['outOfStock'].value_counts().tolist()
        subFig1.add_trace(
            go.Pie(values=countBool, labels=['True', 'False'], title=f'{value}'),
            row=1, col=n).update_traces(hoverinfo='label+value', textinfo='percent', marker=dict(colors=colorPie))
        n+=1
        print(f'The Probability of being out of stock for {value} is :\t\t', round(((countBool[1]/countBool[0])*100), 5), '%')
    else:
        dfTemp = dfN[dfN['Category'] == f'{value}']
        countBool = dfTemp['outOfStock'].value_counts().tolist()
        print(f'The Probability of being out of stock for {value} is :\t\t\t\t', round(((countBool[1]/countBool[0])*100), 5), '%')
        subFig2.add_trace(
            go.Pie(values=countBool, labels=['True', 'False'], title=f'{value}'),
            row=1, col=m).update_traces(hoverinfo='label+value', textinfo='percent', marker=dict(colors=colorPie))
        m+=1
subFig1.show()
subFig2.show()

# f.show()

The Probability of being out of stock for Fruits & Vegetables is :		 4.70588 %
The Probability of being out of stock for Cooking Essentials is :		 13.80952 %
The Probability of being out of stock for Dairy, Bread & Batter is :		 30.12048 %
The Probability of being out of stock for Packaged Food is :		 9.96169 %
The Probability of being out of stock for Meats, Fish & Eggs is :		 28.0 %
The Probability of being out of stock for Biscuits is :				 35.71429 %
The Probability of being out of stock for Personal Care is :				 6.16438 %
The Probability of being out of stock for Home & Cleaning is :				 9.46746 %
The Probability of being out of stock for Health & Hygiene is :				 5.35714 %


In [78]:
# fig = px.pie(dfN, names='outOfStock', color ='outOfStock', color_discrete_sequence=colors, template='plotly_dark')
# countBool = dfN['outOfStock'].value_counts().tolist()
# print(countBool)
# print(f'The Probability of being out of stock for overall is : ', round(((countBool[1]/countBool[0])*100), 5), '%')
# fig.show()

In [79]:
fig = px.pie(dfN, names='availableQuantity', color = 'availableQuantity', color_discrete_sequence=colors, template='plotly_dark').update_traces(hoverinfo='label+value', textinfo='percent', marker=dict(colors=colorPie))
fig.show()

In [80]:
fig = px.pie(dfN,names='discountPercent', color = 'discountPercent', color_discrete_sequence=colors, template='plotly_dark').update_traces(textposition='inside').update_layout(uniformtext_minsize=12, uniformtext_mode='hide')
fig.show()

In [89]:

subFig = mks(rows=1, cols=2)
for value in uniqueCategories:
    dfT1 = df[df['Category'] == f'{value}']
    dfT2 = dfT1.sort_values(by='quantity', ascending = False)
    fig1 = go.Line(y = dfT2['mrp'],line=dict(color='#bc5090', width=4),opacity=0.8)
    fig2 = go.Line(y = dfT2['discountedSellingPrice'] ,line=dict(color='#ffa600', width=4), opacity=0.8)
    print(f'{value} with count - ', dfT2['quantity'].count())
    subFig.add_trace(fig1)
    subFig.add_trace(fig2)
    subFig.update_traces(mode="markers+lines").update_layout(width=2800,autosize=True, hovermode='x').show()
    subFig.update_traces(visible=False)




Fruits & Vegetables with count -  93


Cooking Essentials with count -  514


Dairy, Bread & Batter with count -  129


Packaged Food with count -  388


Meats, Fish & Eggs with count -  63


Biscuits with count -  147


Personal Care with count -  344


Home & Cleaning with count -  194


Health & Hygiene with count -  97


In [82]:
for value in uniqueCategories:
    dfT = dfN[dfN['Category'] == f'{value}']
    fig3dScatter = px.scatter_3d(dfT, x='quantity',y='discountedSellingPrice',z='mrp', color='availableQuantity', title=f'{value}', color_continuous_scale='oryel')
    fig3dScatter.update_layout(height=750).update_traces(marker=dict(size=8, symbol="diamond", line=dict(width=2, color="DarkSlateGrey")),selector=dict(mode="markers")).show()