# Day 14: Loyalty Program's Impact on Transaction Patterns

You are a Business Analyst on the Starbucks Rewards team investigating customer transaction behavior. Your team wants to understand how loyalty program membership influences purchasing patterns. The goal is to compare transaction metrics between loyalty members and non-members.

In [None]:
import pandas as pd
import numpy as np

dim_customers_data = [
  {
    "customer_id": 1,
    "is_loyalty_member": 1
  },
  {
    "customer_id": 2,
    "is_loyalty_member": 0
  },
  {
    "customer_id": 3,
    "is_loyalty_member": 1
  },
  {
    "customer_id": 4,
    "is_loyalty_member": 0
  },
  {
    "customer_id": 5,
    "is_loyalty_member": 1
  },
  {
    "customer_id": 6,
    "is_loyalty_member": 1
  },
  {
    "customer_id": 7,
    "is_loyalty_member": 0
  },
  {
    "customer_id": 8,
    "is_loyalty_member": 1
  },
  {
    "customer_id": 9,
    "is_loyalty_member": 0
  },
  {
    "customer_id": 10,
    "is_loyalty_member": 0
  },
  {
    "customer_id": 11,
    "is_loyalty_member": 1
  },
  {
    "customer_id": 12,
    "is_loyalty_member": 0
  }
]
dim_customers = pd.DataFrame(dim_customers_data)

fct_transactions_data = [
  {
    "customer_id": 1,
    "transaction_id": 101,
    "transaction_date": "2024-07-05",
    "transaction_value": 5.5
  },
  {
    "customer_id": 1,
    "transaction_id": 102,
    "transaction_date": "2024-07-15",
    "transaction_value": 7.25
  },
  {
    "customer_id": 2,
    "transaction_id": 103,
    "transaction_date": "2024-07-10",
    "transaction_value": 4
  },
  {
    "customer_id": 3,
    "transaction_id": 104,
    "transaction_date": "2024-07-20",
    "transaction_value": 8.75
  },
  {
    "customer_id": 4,
    "transaction_id": 105,
    "transaction_date": "2024-07-03",
    "transaction_value": 6.5
  },
  {
    "customer_id": 5,
    "transaction_id": 106,
    "transaction_date": "2024-07-22",
    "transaction_value": 9
  },
  {
    "customer_id": 6,
    "transaction_id": 107,
    "transaction_date": "2024-07-11",
    "transaction_value": 10.5
  },
  {
    "customer_id": 7,
    "transaction_id": 108,
    "transaction_date": "2024-07-18",
    "transaction_value": 4.25
  },
  {
    "customer_id": 8,
    "transaction_id": 109,
    "transaction_date": "2024-07-25",
    "transaction_value": 12
  },
  {
    "customer_id": 9,
    "transaction_id": 110,
    "transaction_date": "2024-07-07",
    "transaction_value": 3.75
  },
  {
    "customer_id": 10,
    "transaction_id": 111,
    "transaction_date": "2024-07-12",
    "transaction_value": 5
  },
  {
    "customer_id": 11,
    "transaction_id": 112,
    "transaction_date": "2024-07-27",
    "transaction_value": 11.25
  },
  {
    "customer_id": 12,
    "transaction_id": 113,
    "transaction_date": "2024-07-08",
    "transaction_value": 6
  },
  {
    "customer_id": 3,
    "transaction_id": 114,
    "transaction_date": "2024-07-30",
    "transaction_value": 7.5
  },
  {
    "customer_id": 5,
    "transaction_id": 115,
    "transaction_date": "2024-07-29",
    "transaction_value": 10
  },
  {
    "customer_id": 1,
    "transaction_id": 116,
    "transaction_date": "2024-07-31",
    "transaction_value": 6.25
  }
]
fct_transactions = pd.DataFrame(fct_transactions_data)


## Question 1

For the month of July 2024, how many transactions did loyalty program members and non-members make? Compare the transaction counts between these two groups.

In [None]:
# Note: pandas and numpy are already imported as pd and np
# The following tables are loaded as pandas DataFrames with the same names: fct_transactions, dim_customers
# Please print your final result or dataframe

# Load the datasets and display them
fct_transactions_df = fct_transactions.copy()
dim_customers_df = dim_customers.copy()

print(fct_transactions_df.info())
print()
print(fct_transactions_df.head())
print()
print(dim_customers_df.info())
print()
print(dim_customers_df.head())
print()
print("=" * 150)

################################################################################
print()
print("=" * 150)
print("=" * 150)
print()
################################################################################
# Question 1 of 3
# For the month of July 2024, how many transactions did loyalty program members and non-members make? Compare the transaction counts between these two groups.

# We are going to start by merging both dataframes into one for further analysis
merged_fct_df =pd.merge(fct_transactions_df, dim_customers_df, how='right', on='customer_id')
print(merged_fct_df.info())
print()
print(merged_fct_df)
print()
print("=" * 150)

# Now that we have merged the dataframes, we can start to look at the transaction patterns of our customers
# First lets transform the 'transaction_date' column to datetime format
merged_fct_df['transaction_date'] = pd.to_datetime(merged_fct_df['transaction_date'], format='%Y-%m-%d', errors='coerce')
print(merged_fct_df.info())
print()
print("=" * 150)

# Now lets filter the dataframe to include transactions for July 2024
jul_fct_df = merged_fct_df[(merged_fct_df['transaction_date'] >= '2024-07-01') & (merged_fct_df['transaction_date'] < '2024-08-01')]
print(jul_fct_df.info())
print()
print(jul_fct_df)
print()
print("=" * 150)

# Now we will count how many transaction did members with and without loyalty membership made in this month
print("Number of transactions made by members with and without loyalty membership:")
print(jul_fct_df['is_loyalty_member'].value_counts())

## Question 2

What is the average transaction value for loyalty program members and non-members during July 2024? Use this to identify which group has a higher average transaction value.

In [None]:
# Note: pandas and numpy are already imported as pd and np
# The following tables are loaded as pandas DataFrames with the same names: fct_transactions, dim_customers
# Please print your final result or dataframe

# Load the datasets and display them
fct_transactions_df = fct_transactions.copy()
dim_customers_df = dim_customers.copy()

print(fct_transactions_df.info())
print()
print(fct_transactions_df.head())
print()
print(dim_customers_df.info())
print()
print(dim_customers_df.head())
print()
print("=" * 150)

################################################################################
print()
print("=" * 150)
print("=" * 150)
print()
################################################################################
# Question 1 of 3
# For the month of July 2024, how many transactions did loyalty program members and non-members make? Compare the transaction counts between these two groups.

# We are going to start by merging both dataframes into one for further analysis
merged_fct_df =pd.merge(fct_transactions_df, dim_customers_df, how='right', on='customer_id')
print(merged_fct_df.info())
print()
print(merged_fct_df)
print()
print("=" * 150)

# Now that we have merged the dataframes, we can start to look at the transaction patterns of our customers
# First lets transform the 'transaction_date' column to datetime format
merged_fct_df['transaction_date'] = pd.to_datetime(merged_fct_df['transaction_date'], format='%Y-%m-%d', errors='coerce')
print(merged_fct_df.info())
print()
print("=" * 150)

# Now lets filter the dataframe to include transactions for July 2024
jul_fct_df = merged_fct_df[(merged_fct_df['transaction_date'] >= '2024-07-01') & (merged_fct_df['transaction_date'] < '2024-08-01')]
print(jul_fct_df.info())
print()
print(jul_fct_df)
print()
print("=" * 150)

# Now we will count how many transaction did members with and without loyalty membership made in this month
print("Number of transactions made by members with and without loyalty membership:")
print(jul_fct_df['is_loyalty_member'].value_counts())

################################################################################
print()
print("=" * 150)
print("=" * 150)
print()
################################################################################
# Question 2 of 3
# What is the average transaction value for loyalty program members and non-members during July 2024? Use this to identify which group has a higher average transaction value.

# Since the data is already filtrered for July 2024, we can groupby loyalty membership and calculate the average transaction value
jul_avg_txn_value = jul_fct_df.groupby('is_loyalty_member')['transaction_value'].mean().reset_index(name='average_transaction_value').round(2)
print("Average transaction value for members with and without loyalty membership during July 2024:")
print(jul_avg_txn_value)

## Question 3

Determine the percentage difference in average transaction value between loyalty program members and non-members for July 2024.

In [None]:
fct_transactions_df = fct_transactions.copy()
dim_customers_df = dim_customers.copy()

print(fct_transactions_df.info())
print()
print(fct_transactions_df.head())
print()
print(dim_customers_df.info())
print()
print(dim_customers_df.head())
print()
print("=" * 150)

################################################################################
print()
print("=" * 150)
print("=" * 150)
print()
################################################################################
# Question 1 of 3
# For the month of July 2024, how many transactions did loyalty program members and non-members make? Compare the transaction counts between these two groups.

# We are going to start by merging both dataframes into one for further analysis
merged_fct_df =pd.merge(fct_transactions_df, dim_customers_df, how='right', on='customer_id')
print(merged_fct_df.info())
print()
print(merged_fct_df)
print()
print("=" * 150)

# Now that we have merged the dataframes, we can start to look at the transaction patterns of our customers
# First lets transform the 'transaction_date' column to datetime format
merged_fct_df['transaction_date'] = pd.to_datetime(merged_fct_df['transaction_date'], format='%Y-%m-%d', errors='coerce')
print(merged_fct_df.info())
print()
print("=" * 150)

# Now lets filter the dataframe to include transactions for July 2024
jul_fct_df = merged_fct_df[(merged_fct_df['transaction_date'] >= '2024-07-01') & (merged_fct_df['transaction_date'] < '2024-08-01')]
print(jul_fct_df.info())
print()
print(jul_fct_df)
print()
print("=" * 150)

# Now we will count how many transaction did members with and without loyalty membership made in this month
print("Number of transactions made by members with and without loyalty membership:")
print(jul_fct_df['is_loyalty_member'].value_counts())

################################################################################
print()
print("=" * 150)
print("=" * 150)
print()
################################################################################
# Question 2 of 3
# What is the average transaction value for loyalty program members and non-members during July 2024? Use this to identify which group has a higher average transaction value.

# Since the data is already filtrered for July 2024, we can groupby loyalty membership and calculate the average transaction value
jul_avg_txn_value = jul_fct_df.groupby('is_loyalty_member')['transaction_value'].mean().reset_index(name='average_transaction_value').round(2)
print("Average transaction value for members with and without loyalty membership during July 2024:")
print(jul_avg_txn_value)
print()
print("=" * 150)

################################################################################
print()
print("=" * 150)
print("=" * 150)
print()
################################################################################
# Question 3 of 3
# Determine the percentage difference in average transaction value between loyalty program members and non-members for July 2024.

# We can directly calculate the percentage difference in average transaction value between members and non-members by subtracting the average transaction value of non-members from the average transaction value of members and 
percentage_diff = (jul_avg_txn_value[jul_avg_txn_value['is_loyalty_member'] == True]['average_transaction_value'].values[0] - jul_avg_txn_value[jul_avg_txn_value['is_loyalty_member'] == False]['average_transaction_value'].values[0]) / jul_avg_txn_value[jul_avg_txn_value['is_loyalty_member'] == False]['average_transaction_value'].values[0] * 100
print("Percentage difference in average transaction value between members and non-members during July 2024:")
print(percentage_diff.round(2), "%")

Made with ❤️ by [Interview Master](https://www.interviewmaster.ai)