# 04 â€” Presentation Assets

**Objective**: Generate presentation-ready visualizations and summaries for executive reporting.

**Outputs**:
- Executive segment summary tables
- High-quality visualizations (PNG/SVG)
- Segment performance metrics
- Recommendations dashboard

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set style for professional presentations
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Load segmentation results
segments_dir = Path('../artifacts/outputs/segments')
features_path = Path('../artifacts/outputs/customer_features.parquet')

segment_assignments = pd.read_parquet(segments_dir / 'segment_assignments.parquet')
segment_summary = pd.read_parquet(segments_dir / 'segment_summary.parquet')
customer_features = pd.read_parquet(features_path)

# Merge for analysis
data = customer_features.merge(segment_assignments, on='user_id', how='inner')

print(f'Loaded {len(data):,} customers across {data["segment"].nunique()} segments')
print(f'Segment distribution:')
print(data['segment'].value_counts().sort_index())

## Executive Summary Tables

In [None]:
# Create executive summary with business metrics
exec_summary = data.groupby('segment').agg({
    'user_id': 'count',
    'n_sessions': ['mean', 'sum'],
    'avg_session_duration_sec': 'mean',
    'avg_page_clicks': 'mean',
    'p_flight_booked': 'mean',
    'p_hotel_booked': 'mean',
    'p_cancellation': 'mean',
    'avg_base_fare_usd': 'mean',
    'avg_hotel_per_room_usd': 'mean'
}).round(3)

# Flatten column names
exec_summary.columns = ['_'.join(col).strip() for col in exec_summary.columns]
exec_summary = exec_summary.rename(columns={
    'user_id_count': 'customers',
    'n_sessions_mean': 'avg_sessions_per_customer',
    'n_sessions_sum': 'total_sessions',
    'avg_session_duration_sec_mean': 'avg_session_duration_min',
    'avg_page_clicks_mean': 'avg_page_clicks',
    'p_flight_booked_mean': 'flight_conversion_rate',
    'p_hotel_booked_mean': 'hotel_conversion_rate',
    'p_cancellation_mean': 'cancellation_rate',
    'avg_base_fare_usd_mean': 'avg_flight_value',
    'avg_hotel_per_room_usd_mean': 'avg_hotel_value'
})

# Convert duration to minutes
exec_summary['avg_session_duration_min'] = exec_summary['avg_session_duration_min'] / 60

# Add percentage of total customers
exec_summary['customer_percentage'] = (exec_summary['customers'] / exec_summary['customers'].sum() * 100).round(1)

# Add segment names if available
if 'segment_name' in segment_summary.columns:
    name_mapping = dict(zip(segment_summary['segment'], segment_summary['segment_name']))
    exec_summary['segment_name'] = exec_summary.index.map(name_mapping)
    exec_summary = exec_summary[['segment_name'] + [col for col in exec_summary.columns if col != 'segment_name']]

print('Executive Summary by Segment:')
display(exec_summary)

## High-Impact Visualizations

In [None]:
# Create output directory
output_dir = Path('../artifacts/step4_presentation')
output_dir.mkdir(parents=True, exist_ok=True)

# 1. Segment Size and Value Distribution
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Segment sizes
sizes = exec_summary['customers']
colors = sns.color_palette('husl', len(sizes))
wedges, texts, autotexts = ax1.pie(sizes, labels=[f'Segment {i}' for i in sizes.index], 
                                   autopct='%1.1f%%', colors=colors, startangle=90)
ax1.set_title('Customer Distribution by Segment', fontsize=14, fontweight='bold')

# Conversion rates comparison
conversion_data = exec_summary[['flight_conversion_rate', 'hotel_conversion_rate']].T
conversion_data.plot(kind='bar', ax=ax2, color=colors[:len(conversion_data.columns)])
ax2.set_title('Conversion Rates by Segment', fontsize=14, fontweight='bold')
ax2.set_ylabel('Conversion Rate')
ax2.set_xlabel('Booking Type')
ax2.legend(title='Segment', bbox_to_anchor=(1.05, 1), loc='upper left')
ax2.tick_params(axis='x', rotation=0)

plt.tight_layout()
plt.savefig(output_dir / 'segment_overview.png', dpi=300, bbox_inches='tight')
plt.savefig(output_dir / 'segment_overview.svg', bbox_inches='tight')
plt.show()

In [None]:
# 2. Segment Performance Heatmap
plt.figure(figsize=(14, 8))

# Select key metrics for heatmap
heatmap_metrics = [
    'avg_sessions_per_customer', 'avg_session_duration_min', 'avg_page_clicks',
    'flight_conversion_rate', 'hotel_conversion_rate', 'cancellation_rate',
    'avg_flight_value', 'avg_hotel_value'
]

# Normalize metrics for better visualization (0-1 scale)
heatmap_data = exec_summary[heatmap_metrics].copy()
for col in heatmap_data.columns:
    heatmap_data[col] = (heatmap_data[col] - heatmap_data[col].min()) / (heatmap_data[col].max() - heatmap_data[col].min())

# Create heatmap
sns.heatmap(heatmap_data.T, annot=True, cmap='RdYlBu_r', fmt='.2f', 
            cbar_kws={'label': 'Normalized Score (0-1)'})
plt.title('Segment Performance Heatmap\n(Normalized Metrics)', fontsize=16, fontweight='bold')
plt.xlabel('Segment')
plt.ylabel('Metrics')

# Improve labels
metric_labels = [
    'Avg Sessions/Customer', 'Avg Session Duration (min)', 'Avg Page Clicks',
    'Flight Conversion Rate', 'Hotel Conversion Rate', 'Cancellation Rate',
    'Avg Flight Value ($)', 'Avg Hotel Value ($)'
]
plt.gca().set_yticklabels(metric_labels)

plt.tight_layout()
plt.savefig(output_dir / 'segment_performance_heatmap.png', dpi=300, bbox_inches='tight')
plt.savefig(output_dir / 'segment_performance_heatmap.svg', bbox_inches='tight')
plt.show()

In [None]:
# 3. Revenue Potential Analysis
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Calculate revenue metrics
revenue_metrics = data.groupby('segment').agg({
    'avg_base_fare_usd': 'mean',
    'avg_hotel_per_room_usd': 'mean',
    'avg_nights': 'mean',
    'n_sessions': 'mean'
}).fillna(0)

# Estimated revenue per customer
revenue_metrics['est_flight_revenue'] = revenue_metrics['avg_base_fare_usd'] * exec_summary['flight_conversion_rate']
revenue_metrics['est_hotel_revenue'] = (revenue_metrics['avg_hotel_per_room_usd'] * 
                                        revenue_metrics['avg_nights'] * 
                                        exec_summary['hotel_conversion_rate'])
revenue_metrics['total_est_revenue'] = revenue_metrics['est_flight_revenue'] + revenue_metrics['est_hotel_revenue']

# Plot 1: Revenue per customer by segment
revenue_metrics[['est_flight_revenue', 'est_hotel_revenue']].plot(kind='bar', stacked=True, ax=axes[0,0])
axes[0,0].set_title('Estimated Revenue per Customer by Segment')
axes[0,0].set_ylabel('Revenue ($)')
axes[0,0].tick_params(axis='x', rotation=0)

# Plot 2: Session engagement vs conversion
scatter_data = exec_summary[['avg_sessions_per_customer', 'flight_conversion_rate', 'customers']]
scatter = axes[0,1].scatter(scatter_data['avg_sessions_per_customer'], 
                           scatter_data['flight_conversion_rate'],
                           s=scatter_data['customers']/10, alpha=0.7, c=range(len(scatter_data)))
axes[0,1].set_xlabel('Avg Sessions per Customer')
axes[0,1].set_ylabel('Flight Conversion Rate')
axes[0,1].set_title('Engagement vs Conversion\n(Bubble size = Customer count)')

# Plot 3: Cancellation rates
exec_summary['cancellation_rate'].plot(kind='bar', ax=axes[1,0], color='coral')
axes[1,0].set_title('Cancellation Rate by Segment')
axes[1,0].set_ylabel('Cancellation Rate')
axes[1,0].tick_params(axis='x', rotation=0)

# Plot 4: Customer lifetime value proxy
clv_proxy = revenue_metrics['total_est_revenue'] * exec_summary['avg_sessions_per_customer']
clv_proxy.plot(kind='bar', ax=axes[1,1], color='lightgreen')
axes[1,1].set_title('Customer Lifetime Value Proxy\n(Revenue Ã— Avg Sessions)')
axes[1,1].set_ylabel('CLV Proxy ($)')
axes[1,1].tick_params(axis='x', rotation=0)

plt.tight_layout()
plt.savefig(output_dir / 'revenue_analysis.png', dpi=300, bbox_inches='tight')
plt.savefig(output_dir / 'revenue_analysis.svg', bbox_inches='tight')
plt.show()

## Export Executive Assets

In [None]:
# Export executive summary table
exec_summary.to_csv(output_dir / 'executive_summary.csv')
exec_summary.to_excel(output_dir / 'executive_summary.xlsx')

# Export revenue analysis
revenue_analysis = pd.concat([exec_summary, revenue_metrics], axis=1)
revenue_analysis.to_csv(output_dir / 'revenue_analysis.csv')

# Create segment recommendations
recommendations = []

for seg in exec_summary.index:
    profile = exec_summary.loc[seg]
    
    # Generate recommendations based on segment characteristics
    if profile['flight_conversion_rate'] > 0.1 and profile['hotel_conversion_rate'] > 0.1:
        rec = "Focus on package deals and loyalty programs"
        priority = "High"
    elif profile['flight_conversion_rate'] > 0.1:
        rec = "Cross-sell hotel bookings with flight promotions"
        priority = "Medium"
    elif profile['hotel_conversion_rate'] > 0.1:
        rec = "Cross-sell flight bookings with hotel promotions"
        priority = "Medium"
    elif profile['avg_sessions_per_customer'] > exec_summary['avg_sessions_per_customer'].median():
        rec = "Implement targeted conversion campaigns"
        priority = "High"
    else:
        rec = "Re-engagement campaigns and special offers"
        priority = "Low"
    
    recommendations.append({
        'segment': seg,
        'customers': int(profile['customers']),
        'customer_percentage': profile['customer_percentage'],
        'recommendation': rec,
        'priority': priority,
        'key_metric': f"Flight: {profile['flight_conversion_rate']:.1%}, Hotel: {profile['hotel_conversion_rate']:.1%}"
    })

recommendations_df = pd.DataFrame(recommendations)
recommendations_df.to_csv(output_dir / 'segment_recommendations.csv', index=False)

print(f'âœ… Presentation assets exported to {output_dir}')
print(f'   â€¢ executive_summary.csv: Business metrics by segment')
print(f'   â€¢ segment_overview.png/svg: Customer distribution and conversion')
print(f'   â€¢ segment_performance_heatmap.png/svg: Performance comparison')
print(f'   â€¢ revenue_analysis.png/svg: Revenue potential analysis')
print(f'   â€¢ segment_recommendations.csv: Actionable recommendations')

print('\nðŸ“‹ Segment Recommendations:')
display(recommendations_df)

## Key Insights Summary

In [None]:
# Generate key insights for executive presentation
print('=== KEY INSIGHTS FOR EXECUTIVE PRESENTATION ===')

# Overall metrics
total_customers = exec_summary['customers'].sum()
avg_conversion_flight = exec_summary['flight_conversion_rate'].mean()
avg_conversion_hotel = exec_summary['hotel_conversion_rate'].mean()

print(f'ðŸ“Š BUSINESS OVERVIEW:')
print(f'   â€¢ Total customers analyzed: {total_customers:,}')
print(f'   â€¢ Number of distinct segments: {len(exec_summary)}')
print(f'   â€¢ Average flight conversion rate: {avg_conversion_flight:.1%}')
print(f'   â€¢ Average hotel conversion rate: {avg_conversion_hotel:.1%}')

# Segment insights
largest_segment = exec_summary['customers'].idxmax()
highest_flight_conv = exec_summary['flight_conversion_rate'].idxmax()
highest_hotel_conv = exec_summary['hotel_conversion_rate'].idxmax()
highest_sessions = exec_summary['avg_sessions_per_customer'].idxmax()

print(f'ðŸŽ¯ SEGMENT HIGHLIGHTS:')
print(f'   â€¢ Largest segment: Segment {largest_segment} ({exec_summary.loc[largest_segment, "customer_percentage"]:.1f}% of customers)')
print(f'   â€¢ Best flight conversion: Segment {highest_flight_conv} ({exec_summary.loc[highest_flight_conv, "flight_conversion_rate"]:.1%})')
print(f'   â€¢ Best hotel conversion: Segment {highest_hotel_conv} ({exec_summary.loc[highest_hotel_conv, "hotel_conversion_rate"]:.1%})')
print(f'   â€¢ Most engaged: Segment {highest_sessions} ({exec_summary.loc[highest_sessions, "avg_sessions_per_customer"]:.1f} avg sessions)')

# Revenue opportunities
if 'total_est_revenue' in revenue_metrics.columns:
    highest_revenue_seg = revenue_metrics['total_est_revenue'].idxmax()
    total_revenue_potential = (revenue_metrics['total_est_revenue'] * exec_summary['customers']).sum()
    
    print(f'ðŸ’° REVENUE OPPORTUNITIES:')
    print(f'   â€¢ Highest revenue potential: Segment {highest_revenue_seg} (${revenue_metrics.loc[highest_revenue_seg, "total_est_revenue"]:.0f}/customer)')
    print(f'   â€¢ Total estimated revenue potential: ${total_revenue_potential:,.0f}')

# Priority recommendations
high_priority_segments = recommendations_df[recommendations_df['priority'] == 'High']['segment'].tolist()
high_priority_customers = recommendations_df[recommendations_df['priority'] == 'High']['customers'].sum()

print(f'ðŸš€ PRIORITY ACTIONS:')
print(f'   â€¢ High-priority segments: {len(high_priority_segments)} segments')
print(f'   â€¢ Customers in high-priority segments: {high_priority_customers:,} ({high_priority_customers/total_customers*100:.1f}%)')
print(f'   â€¢ Focus areas: Package deals, loyalty programs, targeted conversion campaigns')