In [1]:
'''
Project Overview:
This project supports the Disney Parks Revenue Team by analyzing guest spending 
behavior across different park experiences. As a data analyst, the goal is to 
derive actionable insights that reveal nuanced visitor purchasing patterns.

'''

'\nProject Overview:\nThis project supports the Disney Parks Revenue Team by analyzing guest spending \nbehavior across different park experiences. As a data analyst, the goal is to \nderive actionable insights that reveal nuanced visitor purchasing patterns.\n\n'

In [2]:
# Importing necessary libraries for analysis
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date

In [5]:
# Getting the Dataset
fct_guest_spending = pd.read_csv(r'D:\Data Journey\Python-Summer-Party\DataSets\fct_guest_spending.csv')

# Display the first few rows to understand the data
print(fct_guest_spending.head(10))  # Shows the first 5 rows with columns
print(list(fct_guest_spending.columns))
print('Number of rows and columns is:', fct_guest_spending.shape)

   guest_id visit_date  amount_spent park_experience_type
0         1   7/5/2024          50.0           Attraction
1         2   7/6/2024          30.0               Dining
2         3  7/10/2024          20.5               Retail
3         4  7/12/2024          40.0        Entertainment
4         1  7/15/2024          35.0               Dining
5         5  7/20/2024          60.0           Attraction
6         6  7/25/2024          25.0               Retail
7         1   8/3/2024          55.0           Attraction
8         1  8/15/2024          45.0               Dining
9         2   8/5/2024          22.0               Retail
['guest_id', 'visit_date', 'amount_spent', 'park_experience_type']
Number of rows and columns is: (18, 4)


In [6]:
# Question One:
# Finding the average amount of money spent by park experience in July 2024

# Converting the visit_date column to a datetime
fct_guest_spending['visit_date']= pd.to_datetime(fct_guest_spending['visit_date'])
fct_guest_spending

# Subsetting for July 2024 only
July_2024 = fct_guest_spending[(fct_guest_spending['visit_date'].dt.year == 2024) & (fct_guest_spending['visit_date'].dt.month == 7)]
July_2024

# Get all possible park experience types from the full dataset
all_experience_types = fct_guest_spending['park_experience_type'].unique()

# Calculating money spent by park experience type.
money_by_park_experience = July_2024.groupby('park_experience_type')['amount_spent'].mean()

# Reindex to include all experience types, filling missing ones with 0.0
money_by_park_experience = money_by_park_experience.reindex(all_experience_types, fill_value=0.0)

print("Average spending per guest per visit for each park experience type (July 2024):")
print(money_by_park_experience)

Average spending per guest per visit for each park experience type (July 2024):
park_experience_type
Attraction        55.00
Dining            32.50
Retail            22.75
Entertainment     40.00
Character Meet     0.00
Name: amount_spent, dtype: float64


In [7]:
# Question Two
# For guests who visited our parks more than once in August 2024, 
# what is the difference in spending between their first and their last visit? 

# Subsetting for August 2024 only
August_2024 = fct_guest_spending[(fct_guest_spending['visit_date'].dt.year == 2024) & (fct_guest_spending['visit_date'].dt.month == 8)]
visit_counts = August_2024['guest_id'].value_counts()
multi_visitors = visit_counts[visit_counts > 1].index

# Filter for only those guests
multi_visits = August_2024[August_2024['guest_id'].isin(multi_visitors)].copy()

# Sort by guest and visit date
multi_visits.sort_values(['guest_id', 'visit_date'], inplace=True)

# Get first and last visit spending for each guest
first_visits = multi_visits.groupby('guest_id').first()['amount_spent']
last_visits = multi_visits.groupby('guest_id').last()['amount_spent']

# Calculate difference
spending_diff = last_visits - first_visits

print(spending_diff)

guest_id
1   -10.0
2    16.0
3     4.0
Name: amount_spent, dtype: float64


In [8]:
# Question 3
# Categorizing guests' spending in September 2024 into low[<$50], medium[$50 < $100] & high[$100 >]

# Segmenting guests for September only
Sept_2024 = fct_guest_spending[
    (fct_guest_spending['visit_date'].dt.year == 2024) &
    (fct_guest_spending['visit_date'].dt.month == 9)
]

# Grouping by guest_id to get total amount of money spent by each guest in September
Sept_spending = Sept_2024.groupby('guest_id')['amount_spent'].sum().reset_index()

# Exclude guests who did not make any purchases (amount_spent == 0)
Sept_spending = Sept_spending[Sept_spending['amount_spent'] > 0]

# Categorizing spending into segments with correct boundaries:
# Low: $0 <= x < $50, Medium: $50 <= x < $100, High: $100 and above
Sept_spending['Spending_segment'] = Sept_spending['amount_spent'].apply(
    lambda x: 'Low' if x < 50 else ('Medium' if 50 <= x < 100 else 'High')
)

Sept_spending

Unnamed: 0,guest_id,amount_spent,Spending_segment
0,1,65.0,Medium
1,8,50.0,Medium
2,9,40.0,Low
3,10,70.0,Medium
