## 2024 Week 10 - Preppin' for Pulse
![image.png](attachment:57b8de64-7975-4bd0-8b7f-884e6a367f99.png)
![image.png](attachment:97240be3-0776-464e-96d6-a05e3dbf0d9d.png)
![image.png](attachment:b7afabe8-520a-4040-9ddd-81396cd2face.png)

In [479]:
import pandas as pd

In [480]:
excel_sheets = pd.read_excel('2024W10 Input.xlsx', sheet_name=None)

In [481]:
product = excel_sheets['Product Table']
transaction = excel_sheets['Transaction Data']
loyalty = excel_sheets['Loyalty Table']

In [482]:
product.head()

Unnamed: 0,Product_Type,Product_Scent,Pack_Size,Product_Size,Unit_Cost,Selling_Price
0,Bar,Lavender Fields,1x,,1.25,1.77
1,Bar,Citrus Breeze,1x,,0.75,0.81
2,Bar,Ocean Mist,1x,,0.66,1.2
3,Bar,Fresh Rain,1x,,0.94,1.61
4,Bar,Rose Garden,1x,,1.55,2.45


In [483]:
transaction.head()

Unnamed: 0,Transaction_Date,Transanction_Number,Product_ID,Cash_or_Card,Loyalty_Number,Sales_Before_Discount
0,"Sat, January 02, 2021",20121001,Bar-Ocean_Mist-1x,1,1004721.0,6.0
1,"Sat, January 02, 2021",20121001,Liquid-Rose_Garden-0.5L,1,1004721.0,14.1
2,"Sat, January 02, 2021",20121002,Bar-Citrus_Breeze-4x,2,1009280.0,8.12
3,"Sat, January 02, 2021",20121002,Liquid-Coconut_Dream-0.5L,2,1009280.0,12.36
4,"Sat, January 02, 2021",20121003,Bar-Ocean_Mist-4x,1,1009022.0,13.95


In [484]:
loyalty.head()

Unnamed: 0,Loyalty_Number,Customer_Name,Loyalty_Tier,Loyalty_Discount
0,1000012,"trimmill, leeanne",Bronz,
1,1000026,"kobierski, teador",,
2,1000028,"plues, jenelle",Bronz,
3,1000032,"firmager, gabriell",Bronz,
4,1000038,"chiles, nicolea",,


#### Filter to the last 2 years of data i.e. only 2023 and 2024 transactions
This will allow for year on year comparison

In [486]:
transaction.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105495 entries, 0 to 105494
Data columns (total 6 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   Transaction_Date       105495 non-null  object 
 1   Transanction_Number    105495 non-null  int64  
 2   Product_ID             105495 non-null  object 
 3   Cash_or_Card           105495 non-null  int64  
 4   Loyalty_Number         73688 non-null   float64
 5   Sales_Before_Discount  105495 non-null  float64
dtypes: float64(2), int64(2), object(2)
memory usage: 4.8+ MB


In [487]:
transaction.Transaction_Date = pd.to_datetime(transaction.Transaction_Date)

In [488]:
df_last_2_years = transaction[(transaction.Transaction_Date.dt.year == 2024) | (transaction.Transaction_Date.dt.year == 2023)]

#### Create additional rows of data for the days the store was closed, ensuring all other fields will have null values
The store is closed on Sundays and Public Holidays

In [490]:
date_range = pd.Series(pd.date_range(start=df_last_2_years.Transaction_Date.min(), 
                                     end=df_last_2_years.Transaction_Date.max(), 
                                     freq='D'), 
                       name='Transaction_Date')
date_range.head()

0   2023-01-03
1   2023-01-04
2   2023-01-05
3   2023-01-06
4   2023-01-07
Name: Transaction_Date, dtype: datetime64[ns]

In [491]:
df = pd.merge(left=date_range, right=df_last_2_years, on='Transaction_Date', how='left')
df.isnull().sum()

Transaction_Date             0
Transanction_Number         70
Product_ID                  70
Cash_or_Card                70
Loyalty_Number           11819
Sales_Before_Discount       70
dtype: int64

#### Update the Cash_or_Card field so that:
- 1=Card
- 2=Cash

In [493]:
df.Cash_or_Card.value_counts()

Cash_or_Card
1.0    19677
2.0    19590
Name: count, dtype: int64

In [494]:
cash_or_card = {1: 'Card', 2: 'Cash'}
df.Cash_or_Card = df.Cash_or_Card.apply(lambda x: cash_or_card.get(x, x))
df.Cash_or_Card.value_counts()

Cash_or_Card
Card    19677
Cash    19590
Name: count, dtype: int64

#### Join the Product Table
You'll need to prepare the join clause fields first

In [496]:
product_bar = product.query('Product_Type == "Bar"')['Pack_Size']
product_liquid = product.query('Product_Type == "Liquid"')['Product_Size']

In [497]:
product['Product_Size'] = pd.concat((product_bar, product_liquid))

In [498]:
product['Product_ID'] = product.Product_Type + '-' + product.Product_Scent.str.replace(' ', '_') + '-' + product['Product_Size'] 

In [499]:
df = df.merge(right=product, on='Product_ID', how='left')

In [500]:
df.columns

Index(['Transaction_Date', 'Transanction_Number', 'Product_ID', 'Cash_or_Card',
       'Loyalty_Number', 'Sales_Before_Discount', 'Product_Type',
       'Product_Scent', 'Pack_Size', 'Product_Size', 'Unit_Cost',
       'Selling_Price'],
      dtype='object')

In [501]:
cols = ['Transaction_Date', 'Transanction_Number', 'Product_Type', 'Product_Scent', 'Product_Size',
       'Cash_or_Card', 'Loyalty_Number', 'Sales_Before_Discount', 'Unit_Cost', 'Selling_Price']
df = df[cols]

In [502]:
df.head()

Unnamed: 0,Transaction_Date,Transanction_Number,Product_Type,Product_Scent,Product_Size,Cash_or_Card,Loyalty_Number,Sales_Before_Discount,Unit_Cost,Selling_Price
0,2023-01-03,30123001.0,Liquid,Sandalwood Spice,0.25L,Cash,1005245.0,8.5,2.45,4.25
1,2023-01-03,30123001.0,Liquid,Vanilla Bean,0.5L,Cash,1005245.0,14.7,5.97,7.35
2,2023-01-03,30123002.0,Liquid,Sandalwood Spice,1L,Card,1007270.0,13.19,9.67,13.19
3,2023-01-03,30123003.0,Liquid,Eucalyptus Mint,0.25L,Cash,1009750.0,9.0,2.27,4.5
4,2023-01-03,30123003.0,Liquid,Eucalyptus Mint,1L,Cash,1009750.0,39.81,8.96,13.27


#### Calculate the Quantity of each transaction 
Defined as the Sales_Before_Discount / Selling_Price

In [504]:
df.loc[:, ['Transaction_Quantity']] = df.Sales_Before_Discount / df.Selling_Price

#### In the Loyalty Table:
- The Customer_Name is currently reading Last Name, First Name. Update it to read First Name Last Name in Title case
    - e.g. knell, libby becomes Libby Knell
- Group together the Loyalty_Tiers into Gold, Silver and Bronze
- Update the Loyalty_Discount to be a numeric field

In [535]:
def fix_customer_name(name):
    lname, fname = name.title().split(', ')
    return f'{fname} {lname}'

loyalty.Customer_Name = loyalty.Customer_Name.apply(fix_customer_name)

In [537]:
loyalty.Customer_Name

0          Leeanne Trimmill
1          Teador Kobierski
2             Jenelle Plues
3         Gabriell Firmager
4            Nicolea Chiles
               ...         
9784    Thacher Rattenberie
9785        Rosanna Mariaud
9786       Edgar D'Ambrogio
9787         Mark Scrivener
9788            Mendie Fisk
Name: Customer_Name, Length: 9789, dtype: object