Required Libraries

In [1]:
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)

In [2]:
data = pd.read_json('./../data/cleaned_data.json')
data.head()

Unnamed: 0,id,rank,name,weight,flavour,supplimentName,categoryName,secondaryCategory,productPageCreatedAt,brandName,mrp,offerPrice,mrpOfferPriceDiff,discount,comboCount,freebieCount,packaging,pricePerKg,proteinPercentPerServing,numOfServings,servingSize,isReviewEnabled,rating,totalRating,totalReviews,offerAllowed,isEmiAvailable,emiStartsWith,isBestPrice,lastUpdatedAt,reviews
0,164773,1,"MuscleBlaze Biozyme Performance Whey, 4.4 lb ...",2.0,rich chocolate,Biozyme Performance Whey,Whey Proteins,Proteins,2021-03-17T05:30:00,MuscleBlaze,5499,4299,1200,21,19,0,Jar,2149.5,69.0,56.0,36.0,1,4.487719,570,570,1,1,208.44,0,2023-11-20T05:30:00,"[{'id': 399636, 'date': '2022-06-23T14:13:03',..."
1,232197,2,"MuscleBlaze Biozyme Performance Whey PR, 4.4 ...",2.0,chocolate fudge,Biozyme Performance Whey PR,Whey Proteins,Proteins,2023-08-09T05:30:00,MuscleBlaze,5999,4699,1300,21,3,0,Jar,2349.5,67.0,44.0,45.0,1,4.537313,67,67,1,1,221.2,0,2023-11-20T05:30:00,"[{'id': 566519, 'date': '2023-10-22T23:32:30',..."
2,167163,3,"MuscleBlaze Biozyme Performance Whey, 4.4 lb ...",2.0,chocolate hazelnut,Biozyme Performance Whey,Whey Proteins,Proteins,2021-05-17T05:30:00,MuscleBlaze,5499,4299,1200,21,14,0,Jar,2149.5,69.0,56.0,36.0,1,4.492248,258,258,1,1,208.44,0,2023-11-20T05:30:00,"[{'id': 284547, 'date': '2021-06-30T15:19:48',..."
3,77192,4,MuscleBlaze 80% Raw Whey Protein Supplement Po...,1.0,unflavoured,80% Raw Whey Protein Supplement Powder,Raw Whey Proteins,Raw Whey,2017-03-21T05:30:00,MuscleBlaze,2299,1799,500,21,13,0,Packet,1799.0,80.0,33.0,30.0,1,4.39039,333,333,0,0,0.0,1,2023-11-20T05:30:00,"[{'id': 417996, 'date': '2022-12-05T00:00:00',..."
4,153561,5,"MB Fuel One Whey Protein, 4.4 lb Chocolate",2.0,chocolate,Whey Protein,Whey Proteins,Proteins,2020-10-14T05:30:00,MB Fuel One,5099,3699,1400,27,8,0,Jar,1849.5,62.0,51.0,39.0,1,4.460432,139,139,0,0,0.0,1,2023-11-20T05:30:00,"[{'id': 261661, 'date': '2021-03-28T16:47:40',..."


In [3]:
data['productPageCreatedAt'] = pd.to_datetime(data.productPageCreatedAt)
data['lastUpdatedAt'] = pd.to_datetime(data.lastUpdatedAt)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113 entries, 0 to 112
Data columns (total 31 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   id                        113 non-null    int64         
 1   rank                      113 non-null    int64         
 2   name                      113 non-null    object        
 3   weight                    113 non-null    float64       
 4   flavour                   113 non-null    object        
 5   supplimentName            113 non-null    object        
 6   categoryName              113 non-null    object        
 7   secondaryCategory         113 non-null    object        
 8   productPageCreatedAt      113 non-null    datetime64[ns]
 9   brandName                 113 non-null    object        
 10  mrp                       113 non-null    int64         
 11  offerPrice                113 non-null    int64         
 12  mrpOfferPriceDiff     

In [5]:
data['productLaunchYear'] = data.productPageCreatedAt.dt.year
data['productLaunchMonth'] = data.productPageCreatedAt.dt.month_name()
data['productAgeInMonths'] = data.apply(lambda row: (row.lastUpdatedAt.year - row.productPageCreatedAt.year) * 12 + row.lastUpdatedAt.month - row.productPageCreatedAt.month, axis= 1)

In [7]:
fig = px.sunburst(
    data_frame= data.groupby(['productLaunchYear', 'flavour'])[['name', 'totalRating', 'rating']].agg({'name': 'count', 'totalRating': 'sum', 'rating': 'mean'}).rename(columns= {'name': 'No. Of Products', 'totalRating': 'No. Of Ratings', 'rating': 'Average Rating'}).reset_index(),
    path= ['productLaunchYear', 'flavour'],
    values= 'No. Of Products',
    color= 'Average Rating',
    color_continuous_scale= px.colors.sequential.Aggrnyl_r,
    title= 'Number of Products Within Each Flavour and Average Rating by Year',
    hover_data= ['No. Of Ratings', 'No. Of Products'],
    labels= {'No. Of Products': 'Number of Products', 'No. Of Ratings': 'Number of Ratings'},
    template= 'plotly_dark',
    width= 1000,
    height= 600,
)

fig.update_layout(
    margin= dict(l= 0, r= 0, b= 0, t= 50),
    title= dict(x= 0.5),
    coloraxis_colorbar= dict(title= 'Average Rating'),
)

fig.update_traces(hovertemplate= #'<b>Year</b>: %{label}<br>' +
                  '<b>No. Of Products:</b> %{customdata[1]:,}<br>' +
                  '<b>No. Of Ratings:</b> %{customdata[0]:,}<br>' +
                  '<b>Average Rating:</b> %{customdata[2]:.2f}')

fig.show()