In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Set up views
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)


In [4]:
sales_set = pd.read_excel("Sales Data.xlsx")
sales_set.head()

Unnamed: 0,Sales Person,Geography,Product,Date,Amount,Boxes
0,Barr Faughny,New Zealand,Raspberry Choco,2021-01-01,8414,495
1,Dennison Crosswaite,Canada,White Choc,2021-01-01,532,54
2,Karlen McCaffrey,USA,99% Dark & Pure,2021-01-01,5376,269
3,Barr Faughny,New Zealand,Baker's Choco Chips,2021-01-01,259,22
4,Beverie Moffet,USA,Manuka Honey Choco,2021-01-01,5530,179


In [6]:
sales_set.info()
sales_set.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3791 entries, 0 to 3790
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Sales Person  3791 non-null   object        
 1   Geography     3791 non-null   object        
 2   Product       3791 non-null   object        
 3   Date          3791 non-null   datetime64[ns]
 4   Amount        3791 non-null   int64         
 5   Boxes         3791 non-null   int64         
dtypes: datetime64[ns](1), int64(2), object(3)
memory usage: 177.8+ KB


Unnamed: 0,Date,Amount,Boxes
count,3791,3791.0,3791.0
mean,2021-07-24 07:40:22.474281472,5724.537589,354.675284
min,2021-01-01 00:00:00,0.0,0.0
25%,2021-04-08 00:00:00,2352.0,120.5
50%,2021-07-23 00:00:00,4844.0,265.0
75%,2021-11-12 00:00:00,8302.0,480.0
max,2022-01-31 00:00:00,23268.0,3075.0
std,,4296.484695,335.358698


In [10]:
# Check for empty cells
sales_set.isna().sum()
sales_set.isnull().sum()

Sales Person    0
Geography       0
Product         0
Date            0
Amount          0
Boxes           0
dtype: int64

In [11]:
sales_set.shape

(3791, 6)

In [14]:
sales_set.loc[[1,4,6,7]]

Unnamed: 0,Sales Person,Geography,Product,Date,Amount,Boxes
1,Dennison Crosswaite,Canada,White Choc,2021-01-01,532,54
4,Beverie Moffet,USA,Manuka Honey Choco,2021-01-01,5530,179
6,Oby Sorrel,UK,Raspberry Choco,2021-01-01,1057,71
7,Dotty Strutley,Australia,Organic Choco Syrup,2021-01-01,1036,37


In [15]:
sales_set.iloc[[1,4,6,7]]

Unnamed: 0,Sales Person,Geography,Product,Date,Amount,Boxes
1,Dennison Crosswaite,Canada,White Choc,2021-01-01,532,54
4,Beverie Moffet,USA,Manuka Honey Choco,2021-01-01,5530,179
6,Oby Sorrel,UK,Raspberry Choco,2021-01-01,1057,71
7,Dotty Strutley,Australia,Organic Choco Syrup,2021-01-01,1036,37


In [18]:
sales_set.loc[[1,4,6,7], ['Product', 'Amount', 'Boxes']]

Unnamed: 0,Product,Amount,Boxes
1,White Choc,532,54
4,Manuka Honey Choco,5530,179
6,Raspberry Choco,1057,71
7,Organic Choco Syrup,1036,37


In [30]:
sales_set.iloc[[1,4,6,7], [0,2,4]]

Unnamed: 0,Sales Person,Product,Amount
1,Dennison Crosswaite,White Choc,532
4,Beverie Moffet,Manuka Honey Choco,5530
6,Oby Sorrel,Raspberry Choco,1057
7,Dotty Strutley,Organic Choco Syrup,1036


In [29]:
# Making date and time by year month and day
sales_set['Year'] = sales_set['Date'].dt.year
sales_set['Month'] = sales_set['Date'].dt.month_name()
sales_set['Day'] = sales_set['Date'].dt.day_name()
sales_set.head()

Unnamed: 0,Sales Person,Geography,Product,Date,Amount,Boxes,Year,Month,Day
0,Barr Faughny,New Zealand,Raspberry Choco,2021-01-01,8414,495,2021,January,Friday
1,Dennison Crosswaite,Canada,White Choc,2021-01-01,532,54,2021,January,Friday
2,Karlen McCaffrey,USA,99% Dark & Pure,2021-01-01,5376,269,2021,January,Friday
3,Barr Faughny,New Zealand,Baker's Choco Chips,2021-01-01,259,22,2021,January,Friday
4,Beverie Moffet,USA,Manuka Honey Choco,2021-01-01,5530,179,2021,January,Friday


In [33]:
# Slicing using loc and iloc
sales_set.iloc[::]
# sales_set.iloc[:]

Unnamed: 0,Sales Person,Geography,Product,Date,Amount,Boxes,Year,Month,Day
0,Barr Faughny,New Zealand,Raspberry Choco,2021-01-01,8414,495,2021,January,Friday
1,Dennison Crosswaite,Canada,White Choc,2021-01-01,532,54,2021,January,Friday
2,Karlen McCaffrey,USA,99% Dark & Pure,2021-01-01,5376,269,2021,January,Friday
3,Barr Faughny,New Zealand,Baker's Choco Chips,2021-01-01,259,22,2021,January,Friday
4,Beverie Moffet,USA,Manuka Honey Choco,2021-01-01,5530,179,2021,January,Friday
...,...,...,...,...,...,...,...,...,...
3786,Van Tuxwell,India,Drinking Coco,2022-01-19,10682,1526,2022,January,Wednesday
3787,Rafaelita Blaksland,Australia,Mint Chip Choco,2022-01-18,11746,511,2022,January,Tuesday
3788,Kelci Walkden,Australia,Spicy Special Slims,2022-01-03,8862,1477,2022,January,Monday
3789,Gunar Cockshoot,New Zealand,Mint Chip Choco,2022-01-21,11592,527,2022,January,Friday
