In [None]:
# Generate mock analytics data for testing
def generate_mock_analytics_data():
    """Generate mock analytics data for exploration."""
    np.random.seed(42)
    
    # Create page paths
    page_paths = [
        '/',
        '/products',
        '/products/widget-x',
        '/products/widget-y',
        '/products/widget-z',
        '/about',
        '/contact',
        '/blog',
        '/blog/post-1',
        '/blog/post-2',
        '/cart',
        '/checkout',
        '/thank-you'
    ]
    
    # Create dataframe
    data = []
    for page in page_paths:
        # Base metrics with some randomness
        if page == '/':
            sessions = np.random.randint(800, 1200)
            pageviews = sessions * np.random.uniform(1.5, 2.5)
            bounce_rate = np.random.uniform(30, 50)
        elif page.startswith('/products'):
            sessions = np.random.randint(300, 700)
            pageviews = sessions * np.random.uniform(1.2, 2.0)
            bounce_rate = np.random.uniform(40, 60)
        elif page == '/cart':
            sessions = np.random.randint(200, 400)
            pageviews = sessions * np.random.uniform(1.0, 1.5)
            bounce_rate = np.random.uniform(20, 40)
        elif page == '/checkout':
            sessions = np.random.randint(150, 300)
            pageviews = sessions * np.random.uniform(1.0, 1.3)
            bounce_rate = np.random.uniform(15, 30)
        else:
            sessions = np.random.randint(100, 300)
            pageviews = sessions * np.random.uniform(1.1, 1.8)
            bounce_rate = np.random.uniform(50, 70)
        
        # Session duration in seconds
        avg_session_duration = np.random.uniform(30, 180)
        
        data.append({
            'ga:pagePath': page,
            'ga:sessions': int(sessions),
            'ga:pageviews': int(pageviews),
            'ga:bounceRate': bounce_rate,
            'ga:avgSessionDuration': avg_session_duration
        })
    
    return pd.DataFrame(data)

# Generate the mock data
mock_analytics_df = generate_mock_analytics_data()
mock_analytics_df.head()


# Visualize pageviews
plt.figure(figsize=(12, 8))
sns.barplot(x='ga:pageviews', y='ga:pagePath', data=mock_analytics_df.sort_values('ga:pageviews', ascending=False))
plt.title('Page Views by Page')
plt.xlabel('Page Views')
plt.ylabel('Page Path')
plt.tight_layout()
plt.show()

# Visualize bounce rates
plt.figure(figsize=(12, 8))
bounce_df = mock_analytics_df.sort_values('ga:bounceRate', ascending=False)
bars = sns.barplot(x='ga:bounceRate', y='ga:pagePath', data=bounce_df)
plt.title('Bounce Rate by Page')
plt.xlabel('Bounce Rate (%)')
plt.ylabel('Page Path')

# Add value labels
for i, bar in enumerate(bars.patches):
    plt.text(bar.get_width() + 1, 
             bar.get_y() + bar.get_height()/2, 
             f'{bounce_df.iloc[i]["ga:bounceRate"]:.1f}%', 
             va='center')

plt.tight_layout()
plt.show()


# Identify underperforming pages
from src.processors.analytics import identify_underperforming_pages

underperforming = identify_underperforming_pages(mock_analytics_df, threshold=0.4)
print(f"Found {len(underperforming)} underperforming pages:")
underperforming[['ga:pagePath', 'ga:bounceRate', 'ga:avgSessionDuration', 'performance_score']]

# Visualize the performance score
plt.figure(figsize=(12, 8))
perf_df = mock_analytics_df.copy()
perf_df['bounce_score'] = 1 - (perf_df['ga:bounceRate'] / 100)
perf_df['duration_score'] = perf_df['ga:avgSessionDuration'] / perf_df['ga:avgSessionDuration'].max()
perf_df['performance_score'] = (perf_df['bounce_score'] + perf_df['duration_score']) / 2

perf_df = perf_df.sort_values('performance_score')
sns.barplot(x='performance_score', y='ga:pagePath', data=perf_df)
plt.title('Performance Score by Page (Higher is Better)')
plt.xlabel('Performance Score')
plt.ylabel('Page Path')
plt.axvline(x=0.4, color='red', linestyle='--', label='Threshold')
plt.legend()
plt.tight_layout()
plt.show()


# Simulate a conversion funnel
funnel_steps = ['/', '/products', '/cart', '/checkout', '/thank-you']
funnel_data = mock_analytics_df[mock_analytics_df['ga:pagePath'].isin(funnel_steps)].copy()
funnel_data = funnel_data.set_index('ga:pagePath').loc[funnel_steps].reset_index()

plt.figure(figsize=(12, 8))
plt.title('Conversion Funnel')
plt.xlabel('Funnel Step')
plt.ylabel('Sessions')
ax = sns.barplot(x='ga:pagePath', y='ga:sessions', data=funnel_data, order=funnel_steps)

# Add conversion rate labels
for i in range(1, len(funnel_steps)):
    prev_sessions = funnel_data.loc[i-1, 'ga:sessions']
    curr_sessions = funnel_data.loc[i, 'ga:sessions']
    conv_rate = (curr_sessions / prev_sessions) * 100
    
    # Draw arrow
    plt.annotate(
        f'{conv_rate:.1f}%', 
        xy=(i, curr_sessions + 20),
        ha='center',
        va='bottom',
        fontsize=12
    )
    
    # Draw line connecting bars
    ax.annotate(
        '', 
        xy=(i, curr_sessions), 
        xytext=(i-1, prev_sessions),
        arrowprops=dict(arrowstyle='->', lw=2, color='gray')
    )

plt.tight_layout()
plt.show()

# File: notebooks/analytics_exploration.ipynb (continued)

# Calculate overall funnel conversion rate
first_step_sessions = funnel_data.loc[0, 'ga:sessions']
last_step_sessions = funnel_data.loc[len(funnel_steps)-1, 'ga:sessions']
overall_conv_rate = (last_step_sessions / first_step_sessions) * 100

print(f"Overall funnel conversion rate: {overall_conv_rate:.2f}%")
print(f"Visitors: {first_step_sessions}")
print(f"Conversions: {last_step_sessions}")