# TASK #1. IMPORT AND EXPLORE DATASET

In [1]:
import pandas as pd

In [2]:
# Import dataset using Pandas
# Link to Dataset: https://www.kaggle.com/carrie1/ecommerce-data
# Data contains transactions details between 01/12/2010 and 09/12/2011 for a UK-based non-store online retail.
# The company specializes in selling unique gifts

sales_df = pd.read_csv('ecommerce_sales.csv', encoding = 'unicode_escape')
sales_df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France


In [4]:
# Convert Invoice date to datetime format ----> yyyy-mm-dd hh:mm:ss

sales_df['InvoiceDate'] = pd.to_datetime(sales_df['InvoiceDate'])
sales_df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,2011-12-09 12:50:00,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2011-12-09 12:50:00,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,2011-12-09 12:50:00,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,2011-12-09 12:50:00,4.15,12680.0,France


**MINI CHALLENGE #1:**
- **How many unique countries are present in the dataset? List all countries**

In [8]:
sales_df['Country'].unique()

array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
       'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
       'Italy', 'Belgium', 'Lithuania', 'Japan', 'Iceland',
       'Channel Islands', 'Denmark', 'Cyprus', 'Sweden', 'Austria',
       'Israel', 'Finland', 'Bahrain', 'Greece', 'Hong Kong', 'Singapore',
       'Lebanon', 'United Arab Emirates', 'Saudi Arabia',
       'Czech Republic', 'Canada', 'Unspecified', 'Brazil', 'USA',
       'European Community', 'Malta', 'RSA'], dtype=object)

In [9]:
# no of unique countries

sales_df['Country'].nunique()

38

In [10]:
# to know no of unqiue values in each column i.e. across entire data frame

sales_df.nunique()

InvoiceNo      25900
StockCode       4070
Description     4223
Quantity         722
InvoiceDate    23260
UnitPrice       1630
CustomerID      4372
Country           38
dtype: int64

# TASK #2. GROUPBY

In [11]:
sales_df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,2011-12-09 12:50:00,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2011-12-09 12:50:00,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,2011-12-09 12:50:00,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,2011-12-09 12:50:00,4.15,12680.0,France


In [12]:
# A groupby operation involves some combination of splitting the object, applying a function, and combining the results. 
# This can be used to group large amounts of data and compute operations on these groups.
# Link: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html

# get Average unit price of each country

sales_df.groupby('Country')['UnitPrice'].mean()

Country
Australia                 3.220612
Austria                   4.243192
Bahrain                   4.556316
Belgium                   3.644335
Brazil                    4.456250
Canada                    6.030331
Channel Islands           4.932124
Cyprus                    6.302363
Czech Republic            2.938333
Denmark                   3.256941
EIRE                      5.911077
European Community        4.820492
Finland                   5.448705
France                    5.028864
Germany                   3.966930
Greece                    4.885548
Hong Kong                42.505208
Iceland                   2.644011
Israel                    3.633131
Italy                     4.831121
Japan                     2.276145
Lebanon                   5.387556
Lithuania                 2.841143
Malta                     5.244173
Netherlands               2.738317
Norway                    6.012026
Poland                    4.170880
Portugal                  8.582976
RSA         

**MINI CHALLENGE #2:**
- **What is the maximum and minimum prices at 2011-12-09 12:25:00**

In [20]:
sales_df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [21]:
## Approach 1  - using groupby()

sales_df.groupby('InvoiceDate')['UnitPrice'].min()     #  2011-12-09 12:25:00  --->  0.72

InvoiceDate
2010-12-01 08:26:00    2.55
2010-12-01 08:28:00    1.85
2010-12-01 08:34:00    1.65
2010-12-01 08:35:00    5.95
2010-12-01 08:45:00    0.42
                       ... 
2011-12-09 12:23:00    1.45
2011-12-09 12:25:00    0.72
2011-12-09 12:31:00    0.19
2011-12-09 12:49:00    1.25
2011-12-09 12:50:00    0.85
Name: UnitPrice, Length: 23260, dtype: float64

In [22]:
## Approach 1  - using groupby()

sales_df.groupby('InvoiceDate')['UnitPrice'].max()    # 2011-12-09 12:25:00  --->   1.85

InvoiceDate
2010-12-01 08:26:00     7.65
2010-12-01 08:28:00     1.85
2010-12-01 08:34:00     9.95
2010-12-01 08:35:00     5.95
2010-12-01 08:45:00    18.00
                       ...  
2011-12-09 12:23:00     1.85
2011-12-09 12:25:00     1.85
2011-12-09 12:31:00     5.95
2011-12-09 12:49:00     8.95
2011-12-09 12:50:00     4.95
Name: UnitPrice, Length: 23260, dtype: float64

In [23]:
# Approach 2  -- min()

mask = (sales_df['InvoiceDate'] == '2011-12-09 12:25:00')
sales_df[mask]['UnitPrice'].min()

0.72

In [24]:
# Approach 2  -- max()

sales_df[mask]['UnitPrice'].max()

1.85

# TASK #3. CREATE MULTI-INDEXED DATAFRAME

In [25]:
sales_df = pd.read_csv('ecommerce_sales.csv', encoding = 'unicode_escape')
sales_df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France


**MINI CHALLENGE #3:**
- **Sort the DataFrame in a descending order (countries and dates)**

In [35]:
sales_df = pd.read_csv('ecommerce_sales.csv', encoding = 'unicode_escape')
sales_df.set_index( keys = ['Country', 'InvoiceDate'], inplace = True)
sales_df

Unnamed: 0_level_0,Unnamed: 1_level_0,InvoiceNo,StockCode,Description,Quantity,UnitPrice,CustomerID
Country,InvoiceDate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
United Kingdom,12/1/2010 8:26,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2.55,17850.0
United Kingdom,12/1/2010 8:26,536365,71053,WHITE METAL LANTERN,6,3.39,17850.0
United Kingdom,12/1/2010 8:26,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2.75,17850.0
United Kingdom,12/1/2010 8:26,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,3.39,17850.0
United Kingdom,12/1/2010 8:26,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,3.39,17850.0
...,...,...,...,...,...,...,...
France,12/9/2011 12:50,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,0.85,12680.0
France,12/9/2011 12:50,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2.10,12680.0
France,12/9/2011 12:50,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,4.15,12680.0
France,12/9/2011 12:50,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,4.15,12680.0


In [36]:
sales_df.sort_index( ascending = False, inplace = True )
sales_df

Unnamed: 0_level_0,Unnamed: 1_level_0,InvoiceNo,StockCode,Description,Quantity,UnitPrice,CustomerID
Country,InvoiceDate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Unspecified,9/2/2011 12:17,565303,22904,CALENDAR PAPER CUT DESIGN,1,2.95,
Unspecified,9/2/2011 12:17,565303,21329,DINOSAURS WRITING SET,1,1.65,
Unspecified,9/2/2011 12:17,565303,21992,VINTAGE PAISLEY STATIONERY SET,1,1.25,
Unspecified,9/2/2011 12:17,565303,20772,GARDEN PATH JOURNAL,1,2.55,
Unspecified,9/2/2011 12:17,565303,23196,VINTAGE LEAF MAGNETIC NOTEPAD,1,1.45,
...,...,...,...,...,...,...,...
Australia,1/11/2011 9:47,540700,21578,WOODLAND DESIGN COTTON TOTE BAG,12,2.25,12393.0
Australia,1/11/2011 9:47,540700,21577,SAVE THE PLANET COTTON TOTE BAG,12,2.25,12393.0
Australia,1/11/2011 9:47,540700,22245,"HOOK, 1 HANGER ,MAGIC GARDEN",12,0.85,12393.0
Australia,1/11/2011 9:47,540700,22244,3 HOOK HANGER MAGIC GARDEN,12,1.95,12393.0


# TASK #4. MULTI-INDEXING OPERATIONS - PART #1

**MINI CHALLENGE #4:**
- **Use InvoiceDate and Country in order as the multi-index**

In [43]:
sales_df = pd.read_csv('ecommerce_sales.csv', encoding = 'unicode_escape')
sales_df.set_index( keys = ['InvoiceDate', 'Country'], inplace = True)
sales_df

Unnamed: 0_level_0,Unnamed: 1_level_0,InvoiceNo,StockCode,Description,Quantity,UnitPrice,CustomerID
InvoiceDate,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
12/1/2010 8:26,United Kingdom,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2.55,17850.0
12/1/2010 8:26,United Kingdom,536365,71053,WHITE METAL LANTERN,6,3.39,17850.0
12/1/2010 8:26,United Kingdom,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2.75,17850.0
12/1/2010 8:26,United Kingdom,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,3.39,17850.0
12/1/2010 8:26,United Kingdom,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,3.39,17850.0
...,...,...,...,...,...,...,...
12/9/2011 12:50,France,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,0.85,12680.0
12/9/2011 12:50,France,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2.10,12680.0
12/9/2011 12:50,France,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,4.15,12680.0
12/9/2011 12:50,France,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,4.15,12680.0


# TASK #5. MULTI-INDEXING OPERATIONS - PART #2

**MINI CHALLENGE #5:**
- **Calculate the average unit price for transactions occured in "United Kingdom" at "12/1/2010 8:26"**

In [59]:
sales_df = pd.read_csv('ecommerce_sales.csv', encoding = 'unicode_escape')
sales_df.set_index(keys = ["Country", "InvoiceDate"], inplace = True)
# Sort countries to start with alphabetical order
sales_df.sort_index(inplace = True)
sales_df

Unnamed: 0_level_0,Unnamed: 1_level_0,InvoiceNo,StockCode,Description,Quantity,UnitPrice,CustomerID
Country,InvoiceDate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Australia,1/10/2011 9:58,540557,22523,CHILDS GARDEN FORK PINK,96,0.85,12415.0
Australia,1/11/2011 9:47,540700,21581,SKULLS DESIGN COTTON TOTE BAG,6,2.25,12393.0
Australia,1/11/2011 9:47,540700,22619,SET OF 6 SOLDIER SKITTLES,8,3.75,12393.0
Australia,1/11/2011 9:47,540700,84997B,RED 3 PIECE RETROSPOT CUTLERY SET,6,3.75,12393.0
Australia,1/11/2011 9:47,540700,20727,LUNCH BAG BLACK SKULL.,20,1.65,12393.0
...,...,...,...,...,...,...,...
Unspecified,9/2/2011 12:17,565303,85227,SET OF 6 3D KIT CARDS FOR KIDS,4,0.85,
Unspecified,9/2/2011 12:17,565303,22138,BAKING SET 9 PIECE RETROSPOT,2,4.95,
Unspecified,9/2/2011 12:17,565303,21889,WOODEN BOX OF DOMINOES,5,1.25,
Unspecified,9/2/2011 12:17,565303,22550,HOLIDAY FUN LUDO,2,3.75,


In [60]:
sales_df.loc[('United Kingdom','12/1/2010 8:26'), 'UnitPrice']

Country         InvoiceDate   
United Kingdom  12/1/2010 8:26    2.55
                12/1/2010 8:26    3.39
                12/1/2010 8:26    2.75
                12/1/2010 8:26    3.39
                12/1/2010 8:26    3.39
                12/1/2010 8:26    7.65
                12/1/2010 8:26    4.25
Name: UnitPrice, dtype: float64

In [61]:
sales_df.loc[('United Kingdom','12/1/2010 8:26'), 'UnitPrice'].mean()

3.91

# TASK #6. DEALING WITH DATE TIME - BASIC PYTHON DATETIME MODULE

In [62]:
# datetime is one of Python's core standard libraries 
# We are going to use two methods to deal with dates/times: (1) date and (2) dateime
# date: helps us define dates only without including time (month, day, year)
# datetime: helps us define times and dates together (month, day, year, hour, second, microsecond)
# Let's import datetime module as dt

import datetime as dt

# Pick a date using Python's date method inside the datetime module

my_date = dt.date(2020, 3, 22)
my_date

datetime.date(2020, 3, 22)

In [78]:
# print out calendar!

import calendar

print(calendar.month(2024, 3))

     March 2024
Mo Tu We Th Fr Sa Su
             1  2  3
 4  5  6  7  8  9 10
11 12 13 14 15 16 17
18 19 20 21 22 23 24
25 26 27 28 29 30 31



**MINI CHALLENGE #6:**
- **Use Python's datetime method to write your date and time of your birth! Convert it into string format**


In [89]:
my_dob = dt.datetime(1983,3,22,1,5,0)
my_dob

datetime.datetime(1983, 3, 22, 1, 5)

In [90]:
my_dob_str = str(my_dob)
my_dob_str

'1983-03-22 01:05:00'

# TASK #7. DEALING WITH DATE TIME - HANDLING DATES AND TIMES USING PANDAS 

**MINI CHALLENGE #7:**
- **Obtain the business days between 2020-01-01 and 2020-04-01**

In [115]:
business_days = pd.date_range( start = '2024-08-01', end = '2024-08-31', freq = 'B' )
business_days

DatetimeIndex(['2024-08-01', '2024-08-02', '2024-08-05', '2024-08-06',
               '2024-08-07', '2024-08-08', '2024-08-09', '2024-08-12',
               '2024-08-13', '2024-08-14', '2024-08-15', '2024-08-16',
               '2024-08-19', '2024-08-20', '2024-08-21', '2024-08-22',
               '2024-08-23', '2024-08-26', '2024-08-27', '2024-08-28',
               '2024-08-29', '2024-08-30'],
              dtype='datetime64[ns]', freq='B')

# MINI CHALLENGE SOLUTIONS

**MINI CHALLENGE #1 SOLUTION:**
- **How many unique countries are present in the dataset? List all countries**

In [63]:
# Import dataset using Pandas
# Link to Dataset: https://www.kaggle.com/carrie1/ecommerce-data
# Data contains transactions details between 01/12/2010 and 09/12/2011 for a UK-based non-store online retail.
# The company specializes in selling unique gifts
sales_df = pd.read_csv('ecommerce_sales.csv', encoding = 'unicode_escape')
sales_df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France


In [64]:
sales_df['Country'].unique()

array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
       'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
       'Italy', 'Belgium', 'Lithuania', 'Japan', 'Iceland',
       'Channel Islands', 'Denmark', 'Cyprus', 'Sweden', 'Austria',
       'Israel', 'Finland', 'Bahrain', 'Greece', 'Hong Kong', 'Singapore',
       'Lebanon', 'United Arab Emirates', 'Saudi Arabia',
       'Czech Republic', 'Canada', 'Unspecified', 'Brazil', 'USA',
       'European Community', 'Malta', 'RSA'], dtype=object)

In [65]:
# Obtain the number of unique values in each column
sales_df.nunique()

InvoiceNo      25900
StockCode       4070
Description     4223
Quantity         722
InvoiceDate    23260
UnitPrice       1630
CustomerID      4372
Country           38
dtype: int64

**MINI CHALLENGE #2 SOLUTION:**
- **What is the maximum and minimum prices at 2011-12-09 12:25:00**

In [66]:
# Min = 0.72 and Max = 1.85
sales_df.groupby('InvoiceDate')['UnitPrice'].min()

InvoiceDate
1/10/2011 10:04    0.00
1/10/2011 10:07    3.75
1/10/2011 10:08    0.00
1/10/2011 10:32    0.21
1/10/2011 10:35    0.19
                   ... 
9/9/2011 8:48      0.85
9/9/2011 9:03      0.42
9/9/2011 9:13      0.42
9/9/2011 9:38      0.42
9/9/2011 9:52      0.39
Name: UnitPrice, Length: 23260, dtype: float64

In [67]:
sales_df.groupby('InvoiceDate')['UnitPrice'].max()

InvoiceDate
1/10/2011 10:04     0.00
1/10/2011 10:07     3.75
1/10/2011 10:08     0.00
1/10/2011 10:32     5.95
1/10/2011 10:35    18.00
                   ...  
9/9/2011 8:48       8.95
9/9/2011 9:03       5.45
9/9/2011 9:13      15.00
9/9/2011 9:38      45.33
9/9/2011 9:52       5.95
Name: UnitPrice, Length: 23260, dtype: float64

**MINI CHALLENGE #3 SOLUTION:**
- **Sort the DataFrame in a descending order (countries and dates)**

In [68]:
# Please note that there is no point of adding [False, False] or [True, True]
sales_df.sort_index(ascending = False, inplace = True)
sales_df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
541908,581587,22138,BAKING SET 9 PIECE RETROSPOT,3,12/9/2011 12:50,4.95,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.10,12680.0,France
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France
...,...,...,...,...,...,...,...,...
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


**MINI CHALLENGE #4 SOLUTION:**
- **Use InvoiceDate and Country in order as the multi-index**

In [69]:
sales_df = pd.read_csv('ecommerce_sales.csv', encoding = 'unicode_escape')
sales_df.set_index(keys = ["InvoiceDate", "Country"], inplace = True)
sales_df

Unnamed: 0_level_0,Unnamed: 1_level_0,InvoiceNo,StockCode,Description,Quantity,UnitPrice,CustomerID
InvoiceDate,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
12/1/2010 8:26,United Kingdom,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2.55,17850.0
12/1/2010 8:26,United Kingdom,536365,71053,WHITE METAL LANTERN,6,3.39,17850.0
12/1/2010 8:26,United Kingdom,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2.75,17850.0
12/1/2010 8:26,United Kingdom,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,3.39,17850.0
12/1/2010 8:26,United Kingdom,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,3.39,17850.0
...,...,...,...,...,...,...,...
12/9/2011 12:50,France,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,0.85,12680.0
12/9/2011 12:50,France,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2.10,12680.0
12/9/2011 12:50,France,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,4.15,12680.0
12/9/2011 12:50,France,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,4.15,12680.0


**MINI CHALLENGE #5 SOLUTION:**
- **Calculate the average unit price for transactions occured in "United Kingdom" at "12/1/2010 8:26"**

In [1]:
# Let's import the dataset again using Pandas
sales_df = pd.read_csv('ecommerce_sales.csv', encoding = 'unicode_escape')
sales_df.set_index(keys = ["Country", "InvoiceDate"], inplace = True)


print(sales_df.loc[("United Kingdom", "12/1/2010 8:26"), "UnitPrice"])

# feed index as a tuple (important to avoid confusion)
# first argument references rows and the second argument references a column
sales_df.loc[("United Kingdom", "12/1/2010 8:26"), "UnitPrice"].mean()


NameError: name 'pd' is not defined

**MINI CHALLENGE #6 SOLUTION:**
- **Use Python's datetime method to write your date and time of your birth. Convert it into string format!!**


In [71]:
# Let's define a datetime using datetime method as follows
my_birth = dt.datetime(1992, 10, 25, 8, 9, 20)
str(my_birth)


'1992-10-25 08:09:20'

**MINI CHALLENGE #7 SOLUTION:**
- **Obtain the business days between 2020-01-01 and 2020-04-01**


In [72]:
# you can also define a range of dates using B which stands for business days as follows:
my_days = pd.date_range(start = "2020-01-01", end = "2020-04-01", freq = "B")
my_days


DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-06',
               '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10',
               '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
               '2020-01-17', '2020-01-20', '2020-01-21', '2020-01-22',
               '2020-01-23', '2020-01-24', '2020-01-27', '2020-01-28',
               '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-03',
               '2020-02-04', '2020-02-05', '2020-02-06', '2020-02-07',
               '2020-02-10', '2020-02-11', '2020-02-12', '2020-02-13',
               '2020-02-14', '2020-02-17', '2020-02-18', '2020-02-19',
               '2020-02-20', '2020-02-21', '2020-02-24', '2020-02-25',
               '2020-02-26', '2020-02-27', '2020-02-28', '2020-03-02',
               '2020-03-03', '2020-03-04', '2020-03-05', '2020-03-06',
               '2020-03-09', '2020-03-10', '2020-03-11', '2020-03-12',
               '2020-03-13', '2020-03-16', '2020-03-17', '2020-03-18',
      

In [116]:
sales_df.groupby('InvoiceDate')['UnitPrice'].average()

AttributeError: 'SeriesGroupBy' object has no attribute 'average'