In [1]:
# Add project root to Python path
import sys
import os

# Get the project root directory (parent of notebooks directory)
current_dir = os.getcwd()
if current_dir.endswith('notebooks'):
    project_root = os.path.dirname(current_dir)
else:
    project_root = current_dir

# Add project root to Python path
if project_root not in sys.path:
    sys.path.insert(0, project_root)

print(f"Project root: {project_root}")
print(f"Python path includes: {project_root in sys.path}")


Project root: /Users/paulrodriguez/Documents/Documents - Paul’s MacBook Pro/Data Analyst School/_DataCamp/github/tailwagg
Python path includes: True


In [2]:
# Import TailWagg utilities
from src.utils.database import get_database_engine, test_connection
from src.utils.validation import validate_environment
import pandas as pd
import holidays

# Validate environment and get database engine
validate_environment()
engine = get_database_engine()

# Test connection
if test_connection(engine):
    print("✅ Connected to PostgreSQL successfully!")
else:
    print("❌ Failed to connect to PostgreSQL")

# -------------------------------
# CONFIGURATION
# -------------------------------
START_DATE = "2022-01-01"
END_DATE = "2025-12-31"

calendar = pd.DataFrame({"date": pd.date_range(start=START_DATE, end=END_DATE)})
calendar["year"] = calendar["date"].dt.year
calendar["month"] = calendar["date"].dt.month
calendar["day"] = calendar["date"].dt.day
calendar["weekday"] = calendar["date"].dt.day_name()

# -------------------------------
# FEDERAL HOLIDAYS (US)
# -------------------------------
us_holidays = holidays.US(years=range(2022, 2026))
calendar["is_federal_holiday"] = calendar["date"].isin(us_holidays)
calendar["event_name"] = calendar["date"].map(us_holidays)
calendar["event_description"] = calendar["event_name"]

# -------------------------------
# PET & RETAIL EVENTS
# -------------------------------
pet_events = [
    {"name": "National Pet Day", "month": 4, "day": 11, "description": "Celebrates pets and pet owners."},
    {"name": "National Dog Day", "month": 8, "day": 26, "description": "Honors all dogs and encourages adoption."},
    {"name": "National Puppy Day", "month": 3, "day": 23, "description": "Celebrates the unconditional love puppies bring."},
    {"name": "National Cat Day", "month": 10, "day": 29, "description": "Acknowledges cats and adoption efforts."},
]

# Helper for movable events
def get_special_event_dates(year):
    us_h = holidays.US(years=[year])
    # Find Thanksgiving (4th Thursday of November)
    thanksgiving = [d for d, name in us_h.items() if name == "Thanksgiving"]
    if thanksgiving:
        thanksgiving_date = thanksgiving[0]
        black_friday = thanksgiving_date + pd.Timedelta(days=1)
        cyber_monday = thanksgiving_date + pd.Timedelta(days=4)
    else:
        black_friday = cyber_monday = None

    # Father's Day: third Sunday in June
    june_sundays = pd.date_range(f"{year}-06-01", f"{year}-06-30", freq="W-SUN")
    fathers_day = june_sundays[2]
    take_dog_to_work_day = fathers_day + pd.Timedelta(days=5)  # Friday after Father's Day

    events = []
    if black_friday:
        events.append({"name": "Black Friday", "date": black_friday, "description": "Major retail sales event following Thanksgiving."})
    if cyber_monday:
        events.append({"name": "Cyber Monday", "date": cyber_monday, "description": "E-commerce-focused sales day following Thanksgiving weekend."})
    events.append({"name": "Take Your Dog to Work Day", "date": take_dog_to_work_day, "description": "Encourages bringing pets to work (Friday after Father’s Day)."})
    return events

# Build event list
movable_events = []
for y in range(2022, 2026):
    movable_events.extend(get_special_event_dates(y))
    for e in pet_events:
        movable_events.append({
            "name": e["name"],
            "date": pd.Timestamp(year=y, month=e["month"], day=e["day"]),
            "description": e["description"]
        })

# Combine events into one DataFrame
pet_calendar = pd.DataFrame(movable_events)

# -------------------------------
# MERGE INTO MAIN CALENDAR
# -------------------------------
calendar = calendar.merge(
    pet_calendar,
    on="date",
    how="left",
    suffixes=("", "_pet")
)

# Fill missing values from either source
calendar["event_name"] = calendar["event_name"].fillna(calendar["name"])
calendar["event_description"] = calendar["event_description"].fillna(calendar["description"])

calendar["is_holiday"] = calendar["is_federal_holiday"] | calendar["event_name"].notna()

# Cleanup
calendar = calendar.drop(columns=["name", "description"], errors="ignore")
calendar = calendar.sort_values("date").reset_index(drop=True)

# -------------------------------
# OUTPUT
# -------------------------------
print(calendar.head(15))


✅ Connected to PostgreSQL successfully!
         date  year  month  day    weekday  is_federal_holiday  \
0  2022-01-01  2022      1    1   Saturday                True   
1  2022-01-02  2022      1    2     Sunday               False   
2  2022-01-03  2022      1    3     Monday               False   
3  2022-01-04  2022      1    4    Tuesday               False   
4  2022-01-05  2022      1    5  Wednesday               False   
5  2022-01-06  2022      1    6   Thursday               False   
6  2022-01-07  2022      1    7     Friday               False   
7  2022-01-08  2022      1    8   Saturday               False   
8  2022-01-09  2022      1    9     Sunday               False   
9  2022-01-10  2022      1   10     Monday               False   
10 2022-01-11  2022      1   11    Tuesday               False   
11 2022-01-12  2022      1   12  Wednesday               False   
12 2022-01-13  2022      1   13   Thursday               False   
13 2022-01-14  2022      1   14     

  calendar["is_federal_holiday"] = calendar["date"].isin(us_holidays)


# Calendar Events Analysis

This notebook contains comprehensive calendar and seasonal event analysis for the TailWagg pet retail dataset, including:
- US federal holidays
- Pet industry events
- Seasonal event flagging
- Event categorization
- Pre-holiday analysis


## Advanced Calendar Analysis


In [3]:
# Enhanced seasonal event flagging with 2-day pre-holiday capture and event categorization

# Create a comprehensive event categorization function
def categorize_event(event_name):
    """Categorize events by type for better analysis"""
    if pd.isna(event_name):
        return "None"
    
    event_lower = str(event_name).lower()
    
    # Major retail events
    if any(term in event_lower for term in ["black friday", "cyber monday"]):
        return "Major Retail"
    
    # Federal holidays
    elif any(term in event_lower for term in ["christmas", "new year", "thanksgiving", "independence day", "memorial day", "labor day", "presidents day", "martin luther king", "veterans day", "columbus day"]):
        return "Federal Holiday"
    
    # Pet industry events
    elif any(term in event_lower for term in ["pet", "dog", "cat", "puppy", "kitten"]):
        return "Pet Industry"
    
    # Seasonal/retail events
    elif any(term in event_lower for term in ["valentine", "mother", "father", "halloween", "easter", "back to school"]):
        return "Seasonal"
    
    # Other events
    else:
        return "Other"

# Create a function to flag holidays and 2 days before
def create_seasonal_flags(calendar_df):
    """Create flags for holidays and 2 days before each holiday"""
    calendar_df = calendar_df.copy()
    
    # Initialize flags
    calendar_df["is_holiday"] = False
    calendar_df["is_pre_holiday"] = False
    calendar_df["event_category"] = calendar_df["event_name"].apply(categorize_event)
    
    # Get all unique holidays (non-null event names)
    holidays = calendar_df[calendar_df["event_name"].notna()]["event_name"].unique()
    
    for holiday in holidays:
        # Flag the actual holiday
        holiday_mask = calendar_df["event_name"] == holiday
        calendar_df.loc[holiday_mask, "is_holiday"] = True
        
        # Flag 2 days before the holiday
        holiday_dates = calendar_df[holiday_mask]["date"]
        for holiday_date in holiday_dates:
            pre_holiday_dates = [
                holiday_date - pd.Timedelta(days=1),
                holiday_date - pd.Timedelta(days=2)
            ]
            pre_holiday_mask = calendar_df["date"].isin(pre_holiday_dates)
            calendar_df.loc[pre_holiday_mask, "is_pre_holiday"] = True
    
    # Create combined seasonal event flag (holiday OR pre-holiday)
    calendar_df["seasonal_event_flag"] = calendar_df["is_holiday"] | calendar_df["is_pre_holiday"]
    
    return calendar_df

# Apply the enhanced seasonal flagging to calendar
calendar["date"] = pd.to_datetime(calendar["date"])
calendar = create_seasonal_flags(calendar)

print("Enhanced calendar with seasonal flags:")
print(calendar[["date", "event_name", "event_category", "is_holiday", "is_pre_holiday", "seasonal_event_flag"]].head(10))


Enhanced calendar with seasonal flags:
        date      event_name   event_category  is_holiday  is_pre_holiday  \
0 2022-01-01  New Year's Day  Federal Holiday        True           False   
1 2022-01-02             NaN             None       False           False   
2 2022-01-03             NaN             None       False           False   
3 2022-01-04             NaN             None       False           False   
4 2022-01-05             NaN             None       False           False   
5 2022-01-06             NaN             None       False           False   
6 2022-01-07             NaN             None       False           False   
7 2022-01-08             NaN             None       False           False   
8 2022-01-09             NaN             None       False           False   
9 2022-01-10             NaN             None       False           False   

   seasonal_event_flag  
0                 True  
1                False  
2                False  
3            

In [4]:
# Summary of calendar events
print(f"Calendar events processed: {calendar['event_name'].nunique()} unique events")
print(f"Event categories: {calendar['event_category'].value_counts().to_dict()}")
print(f"Seasonal event days: {calendar['seasonal_event_flag'].sum()}")
print(f"Pre-holiday days: {calendar['is_pre_holiday'].sum()}")
print(f"Holiday days: {calendar['is_holiday'].sum()}")

# Show events by category
print("\nEvents by category:")
event_summary = calendar[calendar['event_name'].notna()].groupby('event_category').agg({
    'event_name': 'count',
    'date': ['min', 'max']
}).round(2)
print(event_summary)

# Save calendar for reuse in other notebooks
import os
os.makedirs('data/interim', exist_ok=True)
calendar.to_csv('data/interim/calendar_events.csv', index=False)
print("\n✅ Calendar saved to data/interim/calendar_events.csv")


Calendar events processed: 20 unique events
Event categories: {'None': 1393, 'Federal Holiday': 44, 'Pet Industry': 20, 'Other': 4}
Seasonal event days: 191
Pre-holiday days: 129
Holiday days: 68

Events by category:
                event_name       date           
                     count        min        max
event_category                                  
Federal Holiday         44 2022-01-01 2025-12-25
Other                    4 2022-02-21 2025-02-17
Pet Industry            20 2022-03-23 2025-10-29

✅ Calendar saved to data/interim/calendar_events.csv
