In [1]:
# Libraries for working with data
import numpy as np 
import pandas as pd

# libraries for visualizing data
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline


#########################################################
import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)

In [2]:
#Loading dataset
train = pd.read_csv('../input/tabular-playground-series-jan-2022/train.csv')
test = pd.read_csv('../input/tabular-playground-series-jan-2022/test.csv')
submission = pd.read_csv('../input/tabular-playground-series-jan-2022/sample_submission.csv')

In [3]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26298 entries, 0 to 26297
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   row_id    26298 non-null  int64 
 1   date      26298 non-null  object
 2   country   26298 non-null  object
 3   store     26298 non-null  object
 4   product   26298 non-null  object
 5   num_sold  26298 non-null  int64 
dtypes: int64(2), object(4)
memory usage: 1.2+ MB


In [4]:
# converting date into time series data
train['date'] = pd.to_datetime(train.date)
test['date'] = pd.to_datetime(test.date)

In [5]:
train['product'].unique()

array(['Kaggle Mug', 'Kaggle Hat', 'Kaggle Sticker'], dtype=object)

In [6]:
for col in ['country', 'store','product']:
    print(f'{col}: {train[col].unique()}')


country: ['Finland' 'Norway' 'Sweden']
store: ['KaggleMart' 'KaggleRama']
product: ['Kaggle Mug' 'Kaggle Hat' 'Kaggle Sticker']


In [7]:
a = train.groupby(['country','store','product']).agg({'country': 'count'})
a

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,country
country,store,product,Unnamed: 3_level_1
Finland,KaggleMart,Kaggle Hat,1461
Finland,KaggleMart,Kaggle Mug,1461
Finland,KaggleMart,Kaggle Sticker,1461
Finland,KaggleRama,Kaggle Hat,1461
Finland,KaggleRama,Kaggle Mug,1461
Finland,KaggleRama,Kaggle Sticker,1461
Norway,KaggleMart,Kaggle Hat,1461
Norway,KaggleMart,Kaggle Mug,1461
Norway,KaggleMart,Kaggle Sticker,1461
Norway,KaggleRama,Kaggle Hat,1461


In [8]:
products = train.groupby('product').agg({'product': 'count'}).rename(columns = {'product': 'count'}).reset_index()
products

Unnamed: 0,product,count
0,Kaggle Hat,8766
1,Kaggle Mug,8766
2,Kaggle Sticker,8766


In [9]:
fig = px.pie(products, values = products['count'], names = products['product'])
fig.update_traces(textposition = 'inside', 
                  textinfo = 'percent + label', 
                  hole = 0.75, 
                  marker = dict(colors = ['#2A3132','#336B87'], line = dict(color = 'white', width = 2)))

fig.update_layout(title_text = 'Mug, Hat & Sticker', title_x = 0.5, title_y = 0.53, title_font_size = 16, title_font_family = 'Calibri', title_font_color = 'black',
                  showlegend = False)
                  
fig.show()