# ***project: sales managers dataset***

# Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Import Dataset

In [3]:
df= pd.read_csv('/content/drive/MyDrive/Learning/data analysis/projects/profile project/dataset/sales_data.csv')
df.head(2)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode
0,2.95111e+18,92,238,Not Delivered,8/8/2021,Healthcare,Pablo,21,25.0,Singapore,HC-188,444116
1,2.18191e+18,61,136,Not Delivered,10/3/2021,Office,Pablo,34,14.0,UK,O-555,444772


# Data Cleaning Step

# ***check of data type***

In [4]:
df.dtypes

OrderID                float64
Quantity                 int64
UnitPrice(USD)           int64
Status                  object
OrderDate               object
Product_Category        object
Sales_Manager           object
Shipping_Cost(USD)       int64
Delivery_Time(Days)    float64
Shipping_Address        object
Product_Code            object
OrderCode                int64
dtype: object

In [5]:
# convert from object to datetime

df['OrderDate']= pd.to_datetime(df['OrderDate'])

In [6]:
# check again

df['OrderDate'].dtypes

dtype('<M8[ns]')

In [7]:
df.dtypes

OrderID                       float64
Quantity                        int64
UnitPrice(USD)                  int64
Status                         object
OrderDate              datetime64[ns]
Product_Category               object
Sales_Manager                  object
Shipping_Cost(USD)              int64
Delivery_Time(Days)           float64
Shipping_Address               object
Product_Code                   object
OrderCode                       int64
dtype: object

# ***check of null values***

In [8]:
df.isnull().sum()

OrderID                 0
Quantity                0
UnitPrice(USD)          0
Status                  0
OrderDate               0
Product_Category       36
Sales_Manager           0
Shipping_Cost(USD)      0
Delivery_Time(Days)    51
Shipping_Address        0
Product_Code            0
OrderCode               0
dtype: int64

In [9]:
# check to fill null values with right values if exist

df[df['Product_Category'].isnull() == True].head(5)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode
142,4.76531e+18,59,186,Not Delivered,2021-12-30,,Kristen,28,15.0,USA,HC-101,445660
271,1.22371e+18,14,229,Not Shipped,2021-11-13,,Abdul,34,11.0,UK,F-203,445113
829,2.39241e+18,9,226,Not Shipped,2021-08-26,,Kristen,27,15.0,Italy,O-188,444334
907,1.66961e+18,51,161,Not Shipped,2021-09-10,,Jacob,32,17.0,Italy,ENT-101,444449
983,1.30671e+18,53,168,Delivered,2021-11-07,,Stella,35,21.0,India,O-630,445007


In [10]:
df[df['Product_Category'].isnull() == False].head(5)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode
0,2.95111e+18,92,238,Not Delivered,2021-08-08,Healthcare,Pablo,21,25.0,Singapore,HC-188,444116
1,2.18191e+18,61,136,Not Delivered,2021-10-03,Office,Pablo,34,14.0,UK,O-555,444772
2,3.23911e+18,67,235,Not Delivered,2021-09-27,Office,Kristen,25,11.0,Kenya,O-188,444666
3,1.11261e+18,33,133,Not Shipped,2021-07-30,Fashion,Abdul,34,24.0,USA,F-555,444007
4,1.54831e+18,13,189,Not Delivered,2021-08-15,Fashion,Stella,24,19.0,Kenya,F-555,444223


In [11]:
# try to fill null values of Product_Category by indicate it from Product_Code

df['Product_Category'].unique()

array(['Healthcare', 'Office', 'Fashion', 'Entertainment', 'Home', nan],
      dtype=object)

In [12]:
# split Product_Code column into two columns [Category_code and num_code]

df[['Category_code', 'num_code']]= df['Product_Code'].str.split('-', expand= True)

In [13]:
df.head(2)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode,Category_code,num_code
0,2.95111e+18,92,238,Not Delivered,2021-08-08,Healthcare,Pablo,21,25.0,Singapore,HC-188,444116,HC,188
1,2.18191e+18,61,136,Not Delivered,2021-10-03,Office,Pablo,34,14.0,UK,O-555,444772,O,555


In [14]:
df['Category_code'].unique()

array(['HC', 'O', 'F', 'ENT', 'H'], dtype=object)

In [15]:
# fill null values
# HC= Healthcare, O= Office, F= Fashion, ENT= Entertainment, H= Home

for x in df.index:
  if df.loc[x, 'Category_code'] == 'HC':
    df.loc[x, 'Product_Category']= 'Healthcare'
  elif df.loc[x, 'Category_code'] == 'O':
    df.loc[x, 'Product_Category']= 'Office'
  elif df.loc[x,'Category_code'] == 'F':
    df.loc[x, 'Product_Category']= 'Fashion'
  elif df.loc[x,'Category_code'] == 'ENT':
    df.loc[x, 'Product_Category']= 'Entertainment'
  else : df.loc[x, 'Product_Category']= 'Home'

In [16]:
# check of null values in Product_Category again

df['Product_Category'].isna().sum()

0

In [17]:
df.isnull().sum()

OrderID                 0
Quantity                0
UnitPrice(USD)          0
Status                  0
OrderDate               0
Product_Category        0
Sales_Manager           0
Shipping_Cost(USD)      0
Delivery_Time(Days)    51
Shipping_Address        0
Product_Code            0
OrderCode               0
Category_code           0
num_code                0
dtype: int64

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9999 entries, 0 to 9998
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   OrderID              9999 non-null   float64       
 1   Quantity             9999 non-null   int64         
 2   UnitPrice(USD)       9999 non-null   int64         
 3   Status               9999 non-null   object        
 4   OrderDate            9999 non-null   datetime64[ns]
 5   Product_Category     9999 non-null   object        
 6   Sales_Manager        9999 non-null   object        
 7   Shipping_Cost(USD)   9999 non-null   int64         
 8   Delivery_Time(Days)  9948 non-null   float64       
 9   Shipping_Address     9999 non-null   object        
 10  Product_Code         9999 non-null   object        
 11  OrderCode            9999 non-null   int64         
 12  Category_code        9999 non-null   object        
 13  num_code             9999 non-nul

In [19]:
# calculate the percetage of null values to all values

((9999 - 9948)/9999) * 100

0.51005100510051

In [20]:
df[df['Delivery_Time(Days)'].isnull() == True].head(2)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode,Category_code,num_code
30,1.40891e+18,7,130,Delivered,2021-07-04,Healthcare,Sofia,20,,UK,HC-630,443881,HC,630
79,2.19931e+18,35,219,Not Delivered,2021-07-30,Office,Stella,24,,Kenya,O-630,444007,O,630


In [21]:
# fill null values in Delivery_Time(Days) which is float series and doesn't had isna() or isnull() attribute so use np.isnan

for x in df.index:
  if np.isnan(df.loc[x, 'Delivery_Time(Days)']):
    fill_value= df['Delivery_Time(Days)'].mean()
    df['Delivery_Time(Days)'].fillna(fill_value, inplace= True)

In [22]:
# check of null values again

df['Delivery_Time(Days)'].isnull().sum()

0

In [23]:
df.isnull().sum()

OrderID                0
Quantity               0
UnitPrice(USD)         0
Status                 0
OrderDate              0
Product_Category       0
Sales_Manager          0
Shipping_Cost(USD)     0
Delivery_Time(Days)    0
Shipping_Address       0
Product_Code           0
OrderCode              0
Category_code          0
num_code               0
dtype: int64

# ***check duplicated values***

In [24]:
df.duplicated().sum()

0

# ***check of white spaces***

In [25]:
df.columns

Index(['OrderID', 'Quantity', 'UnitPrice(USD)', 'Status', 'OrderDate',
       'Product_Category', 'Sales_Manager', 'Shipping_Cost(USD)',
       'Delivery_Time(Days)', 'Shipping_Address', 'Product_Code', 'OrderCode',
       'Category_code', 'num_code'],
      dtype='object')

In [26]:
# strip white spaces in columns title

df.columns.str= df.columns.str.strip()

In [27]:
# strip white spaces in values

for x in df.index:
  df.iloc[x].str.strip()

In [28]:
df.head(2)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode,Category_code,num_code
0,2.95111e+18,92,238,Not Delivered,2021-08-08,Healthcare,Pablo,21,25.0,Singapore,HC-188,444116,HC,188
1,2.18191e+18,61,136,Not Delivered,2021-10-03,Office,Pablo,34,14.0,UK,O-555,444772,O,555


In [29]:
# drop columns not need any more

df.drop(columns= ['Category_code', 'num_code'], axis= 1, inplace= True)

# ***check of outliers***

In [30]:
df.describe().round(2)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Shipping_Cost(USD),Delivery_Time(Days),OrderCode
count,9999.0,9999.0,9999.0,9999.0,9999.0,9999.0
mean,3.020951e+18,50.74,175.62,27.57,17.52,444682.92
std,1.167549e+18,29.01,43.53,4.61,4.6,525.34
min,1.01001e+18,1.0,100.0,20.0,10.0,443880.0
25%,2.01041e+18,26.0,138.0,24.0,14.0,444222.0
50%,3.00241e+18,51.0,176.0,28.0,17.52,444772.0
75%,4.02451e+18,76.0,213.0,32.0,22.0,445113.0
max,5.04961e+18,100.0,250.0,35.0,25.0,445660.0


In [31]:
# choose num values only

df_num = df[['Quantity', 'UnitPrice(USD)', 'Shipping_Cost(USD)', 'Delivery_Time(Days)']]

In [32]:
px.box(df_num, color= 'variable', boxmode= 'overlay').update_yaxes(matches= None)

# Data Analysis Step

In [33]:
px.imshow(df_num.corr(numeric_only= True),height= 400, color_continuous_scale='Blues',aspect=True,text_auto=True)

- there are no 'Good Relation' between num columns so, try to find something else to indicate it.

In [34]:
df.head(2)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode
0,2.95111e+18,92,238,Not Delivered,2021-08-08,Healthcare,Pablo,21,25.0,Singapore,HC-188,444116
1,2.18191e+18,61,136,Not Delivered,2021-10-03,Office,Pablo,34,14.0,UK,O-555,444772


In [35]:
# check if Status column has a good relation with other column by calculate new column call profit

df['Status'].unique()

array(['Not Delivered', 'Not Shipped', 'Delivered', 'Shipped'],
      dtype=object)

In [36]:
# calculate profit for each row has Status "Delivered"

for x in df.index:
  if df.loc[x, 'Status'] == 'Delivered':
    df.loc[x, 'profit'] = (df.loc[x, 'Quantity'] * df.loc[x, 'UnitPrice(USD)']) - df.loc[x, 'Shipping_Cost(USD)']
  else:
    df.loc[x, 'profit']= 0

In [37]:
# select num data

df_num= df[['Quantity', 'UnitPrice(USD)', 'Shipping_Cost(USD)', 'Delivery_Time(Days)', 'profit']]

In [38]:
px.imshow(df_num.corr(numeric_only= True),color_continuous_scale='Blues',aspect=True,text_auto=True,height= 400)

# **Q: How much profit for each Product_Category that has Delivered Status?**

In [39]:
# select data

df[df['Status'] == 'Delivered'][['Product_Category', 'profit']].groupby('Product_Category').sum().sort_values(by= 'profit', ascending= False)

Unnamed: 0_level_0,profit
Product_Category,Unnamed: 1_level_1
Healthcare,4907158.0
Office,4376335.0
Fashion,4282279.0
Entertainment,3984410.0
Home,3968696.0


In [81]:
chart_color = df[df['Status'] == 'Delivered'][['Product_Category', 'profit']].groupby('Product_Category').sum().index
px.bar(df[df['Status'] == 'Delivered'][['Product_Category', 'profit']].groupby('Product_Category').sum(),color=chart_color,height= 400)

In [41]:
df['Status'].value_counts().sort_values(ascending= False)

Not Delivered    2572
Shipped          2554
Delivered        2451
Not Shipped      2422
Name: Status, dtype: int64

- Alot of orders didn't deliverd, avoid that by filtring orders which have status 'Deliverd' and have profit.

In [42]:
# filter data with profit column

df[df['profit'] != 0].head()

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode,profit
7,4.79751e+18,48,240,Delivered,2021-10-04,Fashion,Abdul,22,21.0,Kenya,F-203,444773,11498.0
11,2.59601e+18,44,246,Delivered,2021-07-11,Office,Stella,20,12.0,Kenya,O-555,443888,10804.0
12,3.88231e+18,78,219,Delivered,2021-10-29,Entertainment,Emma,24,19.0,USA,ENT-901,444998,17058.0
16,2.80881e+18,96,115,Delivered,2021-10-17,Home,Jacob,30,25.0,Kenya,H-901,444886,11010.0
17,1.93251e+18,62,237,Delivered,2021-08-24,Fashion,Stella,32,13.0,Italy,F-203,444332,14662.0


In [43]:
px.imshow(df[df['profit'] != 0].corr(numeric_only= True),color_continuous_scale='Blues',text_auto=True,aspect=True,height= 400)

- **Very Good Relation**  
- Quantity & profit 0.89.

In [82]:
px.scatter(data_frame= df[df['profit'] != 0], x= 'Quantity', y= 'profit', height= 400, trendline= 'ols')

# **Q: How many Quantity in orders for each Product Category for Deliverd Orders?**

In [45]:
# select data

df[df['profit'] != 0][['Product_Category', 'Quantity']].groupby('Product_Category').sum().sort_values(by= 'Quantity', ascending= False)

Unnamed: 0_level_0,Quantity
Product_Category,Unnamed: 1_level_1
Healthcare,28342
Office,24866
Fashion,24633
Home,23225
Entertainment,22371


#**Q: How much Quantity of Orders for each Shipping Address for Deliverd Orders?**

In [46]:
df[df['profit'] != 0][['Shipping_Address', 'Quantity']].groupby('Shipping_Address').sum().sort_values(by= 'Quantity', ascending= False)

Unnamed: 0_level_0,Quantity
Shipping_Address,Unnamed: 1_level_1
USA,16492
Kenya,16214
China,16172
Singapore,15666
UK,15294
Italy,14854
India,14806
Germany,13939


#**Q: How much profit for each sales manager regarding to quantity?**

In [47]:
df[df['profit']!=0][['Sales_Manager', 'Quantity', 'profit']].groupby('Sales_Manager').sum().sort_values(by='profit',ascending=False)

Unnamed: 0_level_0,Quantity,profit
Sales_Manager,Unnamed: 1_level_1,Unnamed: 2_level_1
Stella,13568,2368747.0
Emma,13263,2293650.0
Abdul,12817,2172082.0
John,12202,2170210.0
Jacob,12199,2140757.0
Maria,12074,2140647.0
Anthony,12453,2138622.0
Kristen,12067,2098779.0
Pablo,11573,2010760.0
Sofia,11221,1984624.0


In [48]:
px.bar(df[df['profit']!=0][['Sales_Manager', 'Quantity', 'profit']].groupby('Sales_Manager').sum(),barmode='group',facet_col='variable',height=400,text_auto=True).update_yaxes(matches=None)

- orders with Status = 'Shipped' are consider as waiting profit, let's try analyzing it

# **Q: How much waiting profit for orders have status 'Shipped' for each product category?**

In [49]:
df['Status'].unique()

array(['Not Delivered', 'Not Shipped', 'Delivered', 'Shipped'],
      dtype=object)

In [50]:
# calculate new column call waiting profit

for x in df.index:
  if df.loc[x, 'Status'] == 'Shipped':
    df.loc[x, 'waiting_profit']= (df.loc[x, 'Quantity'] * df.loc[x, 'UnitPrice(USD)']) - df.loc[x, 'Shipping_Cost(USD)']

In [51]:
df['waiting_profit']

0          NaN
1          NaN
2          NaN
3          NaN
4          NaN
         ...  
9994       NaN
9995    4965.0
9996       NaN
9997    2084.0
9998       NaN
Name: waiting_profit, Length: 9999, dtype: float64

In [52]:
df['waiting_profit'].isnull().sum()

7445

In [53]:
# fill NaN values

for x in df.index:
  if np.isnan(df.loc[x, 'waiting_profit']):
    df.loc[x, 'waiting_profit']= 0

In [54]:
df['waiting_profit'].isnull().sum()

0

In [55]:
px.imshow(df[df['Status']=='Shipped'].corr(numeric_only=True),color_continuous_scale='Blues',aspect=True,text_auto=True)

In [56]:
df[df['Status'] == 'Shipped'][['Product_Category', 'waiting_profit']].groupby('Product_Category').sum().sort_values(by= 'waiting_profit', ascending= False)

Unnamed: 0_level_0,waiting_profit
Product_Category,Unnamed: 1_level_1
Home,4716650.0
Fashion,4659861.0
Office,4649698.0
Entertainment,4503695.0
Healthcare,4284157.0


#**Q: How many Quantity for each Product Category in Shipped Orders Status?**

In [57]:
df[df['Status']=='Shipped'][['Product_Category','Quantity']].groupby('Product_Category').sum().sort_values(by='Quantity',ascending=False)

Unnamed: 0_level_0,Quantity
Product_Category,Unnamed: 1_level_1
Home,26895
Office,26853
Fashion,26162
Entertainment,25897
Healthcare,24433


# **Q: How much waiting profit for each sales manager regarding to Quantity?**

In [58]:
# select data

df[df['waiting_profit'] != 0][['Sales_Manager','waiting_profit','Quantity']].groupby('Sales_Manager').sum().sort_values(by='waiting_profit',ascending= False)

Unnamed: 0_level_0,waiting_profit,Quantity
Sales_Manager,Unnamed: 1_level_1,Unnamed: 2_level_1
John,2549533.0,14454
Sofia,2549033.0,15299
Pablo,2457760.0,14024
Anthony,2413003.0,13778
Abdul,2357872.0,12938
Kristen,2156051.0,12482
Jacob,2114856.0,12025
Maria,2105778.0,11642
Emma,2095168.0,12248
Stella,2015007.0,11350


In [59]:
px.bar(df[df['waiting_profit'] != 0][['Sales_Manager','waiting_profit','Quantity']].groupby('Sales_Manager').sum(),barmode='group',facet_col='variable',height=400).update_yaxes(matches=None)

# **Q: How much waiting profit and profit for each sales manager, and Whoes the big expected profit?**

In [60]:
# select data

df[['Sales_Manager', 'profit', 'waiting_profit']].groupby('Sales_Manager').sum()

Unnamed: 0_level_0,profit,waiting_profit
Sales_Manager,Unnamed: 1_level_1,Unnamed: 2_level_1
Abdul,2172082.0,2357872.0
Anthony,2138622.0,2413003.0
Emma,2293650.0,2095168.0
Jacob,2140757.0,2114856.0
John,2170210.0,2549533.0
Kristen,2098779.0,2156051.0
Maria,2140647.0,2105778.0
Pablo,2010760.0,2457760.0
Sofia,1984624.0,2549033.0
Stella,2368747.0,2015007.0


In [83]:
px.bar(df[['Sales_Manager', 'profit', 'waiting_profit']].groupby('Sales_Manager').sum(), height= 400)

# **Q: what is the percentage of Not Deliverd orders to Deliverd orders?**

In [62]:
df['Status'].value_counts()

Not Delivered    2572
Shipped          2554
Delivered        2451
Not Shipped      2422
Name: Status, dtype: int64

In [63]:
2572/(2451+2572)*100

51.20445948636273

#**Q: How much profit by Date?**

In [64]:
df.head(2)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode,profit,waiting_profit
0,2.95111e+18,92,238,Not Delivered,2021-08-08,Healthcare,Pablo,21,25.0,Singapore,HC-188,444116,0.0,0.0
1,2.18191e+18,61,136,Not Delivered,2021-10-03,Office,Pablo,34,14.0,UK,O-555,444772,0.0,0.0


In [65]:
df[df['profit'] != 0][['OrderDate', 'profit']].groupby('OrderDate').sum()

Unnamed: 0_level_0,profit
OrderDate,Unnamed: 1_level_1
2021-07-03,307473.0
2021-07-04,202839.0
2021-07-05,200942.0
2021-07-06,161709.0
2021-07-07,119741.0
...,...
2021-12-23,124056.0
2021-12-24,268616.0
2021-12-26,195802.0
2021-12-29,146160.0


In [66]:
px.line(df[df['profit'] != 0][['OrderDate', 'profit']].groupby('OrderDate').sum(), markers= True, height= 400)

#**Q: How many Quantity by Date?**

In [67]:
df[df['profit'] != 0][['OrderDate', 'Quantity']].groupby('OrderDate').sum().sort_values(by='OrderDate')

Unnamed: 0_level_0,Quantity
OrderDate,Unnamed: 1_level_1
2021-07-03,1783
2021-07-04,1141
2021-07-05,1073
2021-07-06,915
2021-07-07,677
...,...
2021-12-23,647
2021-12-24,1544
2021-12-26,1093
2021-12-29,793


In [68]:
px.line(df[df['profit'] != 0][['OrderDate', 'Quantity']].groupby('OrderDate').sum().sort_values(by='OrderDate'),markers=True,height=400)

- all orders were in 2021, try to analyzing by month

#**Q: How much profit by month regarding to Quantity in 2021?**

In [69]:
df['month']= pd.DatetimeIndex(df['OrderDate']).month

In [70]:
df.head(2)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode,profit,waiting_profit,month
0,2.95111e+18,92,238,Not Delivered,2021-08-08,Healthcare,Pablo,21,25.0,Singapore,HC-188,444116,0.0,0.0,8
1,2.18191e+18,61,136,Not Delivered,2021-10-03,Office,Pablo,34,14.0,UK,O-555,444772,0.0,0.0,10


In [71]:
df[df['profit']!=0][['month','profit','Quantity']].groupby('month').sum().sort_values(by='month')

Unnamed: 0_level_0,profit,Quantity
month,Unnamed: 1_level_1,Unnamed: 2_level_1
7,4004405.0,22568
8,2965285.0,17095
9,2922015.0,16606
10,4847770.0,27927
11,3065669.0,17820
12,3713734.0,21421


In [72]:
px.line(df[df['profit'] != 0][['month', 'profit','Quantity']].groupby('month').sum(), markers= True,facet_col='variable',height= 400).update_yaxes(matches=None)

In [73]:
df.head(2)

Unnamed: 0,OrderID,Quantity,UnitPrice(USD),Status,OrderDate,Product_Category,Sales_Manager,Shipping_Cost(USD),Delivery_Time(Days),Shipping_Address,Product_Code,OrderCode,profit,waiting_profit,month
0,2.95111e+18,92,238,Not Delivered,2021-08-08,Healthcare,Pablo,21,25.0,Singapore,HC-188,444116,0.0,0.0,8
1,2.18191e+18,61,136,Not Delivered,2021-10-03,Office,Pablo,34,14.0,UK,O-555,444772,0.0,0.0,10


In [74]:
df[['OrderDate','Product_Category', 'profit']].groupby(['OrderDate','Product_Category']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,profit
OrderDate,Product_Category,Unnamed: 2_level_1
2021-07-03,Entertainment,98465.0
2021-07-03,Fashion,50883.0
2021-07-03,Healthcare,61017.0
2021-07-03,Home,67860.0
2021-07-03,Office,29248.0
...,...,...
2021-12-30,Entertainment,48026.0
2021-12-30,Fashion,23324.0
2021-12-30,Healthcare,36439.0
2021-12-30,Home,30727.0


In [75]:
df.pivot_table(index='OrderDate',columns='Product_Category',values='profit',aggfunc='sum')

Product_Category,Entertainment,Fashion,Healthcare,Home,Office
OrderDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-07-03,98465.0,50883.0,61017.0,67860.0,29248.0
2021-07-04,42884.0,14803.0,29019.0,62414.0,53719.0
2021-07-05,45248.0,28237.0,55360.0,9722.0,62375.0
2021-07-06,35760.0,30948.0,30684.0,15770.0,48547.0
2021-07-07,31327.0,35166.0,13946.0,7250.0,32052.0
...,...,...,...,...,...
2021-12-23,46228.0,30967.0,35755.0,0.0,11106.0
2021-12-24,68415.0,44993.0,65064.0,35189.0,54955.0
2021-12-26,49971.0,19233.0,52139.0,56881.0,17578.0
2021-12-29,17032.0,10637.0,22044.0,40847.0,55600.0


In [76]:
px.line(df.pivot_table(index='OrderDate',columns='Product_Category',values='profit',aggfunc='sum'),facet_col='Product_Category',height=400)