# Advanced Visualizations

This notebook creates advanced visualizations for the Tableau dashboard, including heatmaps and comparative charts.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [None]:
# Load cleaned data
df = pd.read_csv('../data/cleaned_posts.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['day'] = pd.to_datetime(df['day'])
print(f"Loaded {len(df)} rows of cleaned data")
df.head()

In [None]:
# 1. Heatmap of hour vs weekday engagement
heatmap_data = df.groupby(['hour', 'weekday'])['engagement_rate'].mean().reset_index()
pivot_table = heatmap_data.pivot(index='weekday', columns='hour', values='engagement_rate')

plt.figure(figsize=(15, 8))
sns.heatmap(pivot_table, annot=True, fmt='.3f', cmap='YlGnBu')
plt.title('Engagement Rate Heatmap: Hour vs Weekday')
plt.xlabel('Hour of Day')
plt.ylabel('Day of Week')
plt.yticks(ticks=np.arange(0.5, 7.5), labels=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], rotation=0)
plt.tight_layout()
plt.show()

In [None]:
# 2. Platform vs Post Type Performance
platform_post_performance = df.groupby(['platform', 'post_type'])['engagement_rate'].mean().reset_index()
pivot_platform_post = platform_post_performance.pivot(index='platform', columns='post_type', values='engagement_rate')

plt.figure(figsize=(12, 8))
sns.heatmap(pivot_platform_post, annot=True, fmt='.3f', cmap='YlOrRd')
plt.title('Engagement Rate: Platform vs Post Type')
plt.xlabel('Post Type')
plt.ylabel('Platform')
plt.tight_layout()
plt.show()

In [None]:
# 3. Top performing posts
top_posts = df.nlargest(10, 'engagement_rate')
plt.figure(figsize=(12, 8))
sns.barplot(data=top_posts, x='engagement_rate', y='post_id')
plt.title('Top 10 Performing Posts by Engagement Rate')
plt.xlabel('Engagement Rate')
plt.ylabel('Post ID')
plt.tight_layout()
plt.show()

In [None]:
# 4. Engagement trends over time by platform
daily_platform_engagement = df.groupby(['day', 'platform'])['engagement_rate'].mean().reset_index()\n

plt.figure(figsize=(14, 8))
for platform in daily_platform_engagement['platform'].unique():
    platform_data = daily_platform_engagement[daily_platform_engagement['platform'] == platform]
    plt.plot(platform_data['day'], platform_data['engagement_rate'], marker='o', label=platform, linewidth=2)

plt.title('Engagement Trends Over Time by Platform')
plt.xlabel('Date')
plt.ylabel('Average Engagement Rate')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 5. Distribution of post types across platforms
post_type_platform = df.groupby(['platform', 'post_type']).size().reset_index(name='count')
pivot_post_platform = post_type_platform.pivot(index='platform', columns='post_type', values='count').fillna(0)

plt.figure(figsize=(12, 8))
sns.heatmap(pivot_post_platform, annot=True, fmt='.0f', cmap='Blues')
plt.title('Distribution of Post Types Across Platforms')
plt.xlabel('Post Type')
plt.ylabel('Platform')
plt.tight_layout()
plt.show()

In [None]:
# 6. Box plot of engagement rates by platform
plt.figure(figsize=(12, 8))
sns.boxplot(data=df, x='platform', y='engagement_rate')
plt.title('Distribution of Engagement Rates by Platform')
plt.xlabel('Platform')
plt.ylabel('Engagement Rate')
plt.tight_layout()
plt.show()

In [None]:
# 7. Scatter plot: Reach vs Interactions with engagement rate as color
plt.figure(figsize=(12, 8))
scatter = plt.scatter(df['reach'], df['interactions'], c=df['engagement_rate'], cmap='viridis', alpha=0.7)
plt.colorbar(scatter, label='Engagement Rate')
plt.xlabel('Reach')
plt.ylabel('Interactions')
plt.title('Reach vs Interactions (colored by Engagement Rate)')
plt.tight_layout()
plt.show()

In [None]:
# Prepare data for Tableau dashboard
# Save aggregated datasets for dashboard

# Hourly aggregation
hourly_agg = df.groupby('hour').agg({
    'engagement_rate': ['mean', 'count']
}).reset_index()
hourly_agg.columns = ['hour', 'avg_engagement_rate', 'post_count']
hourly_agg.to_csv('../data/hourly_aggregation.csv', index=False)

# Weekday aggregation
weekday_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
weekday_agg = df.groupby('weekday').agg({
    'engagement_rate': ['mean', 'count']
}).reset_index()
weekday_agg.columns = ['weekday', 'avg_engagement_rate', 'post_count']
weekday_agg['weekday_name'] = [weekday_names[i] for i in weekday_agg['weekday']]
weekday_agg.to_csv('../data/weekday_aggregation.csv', index=False)

# Platform aggregation
platform_agg = df.groupby('platform').agg({
    'engagement_rate': 'mean',
    'interactions': 'sum',
    'post_id': 'count'
}).reset_index()
platform_agg.columns = ['platform', 'avg_engagement_rate', 'total_interactions', 'total_posts']
platform_agg.to_csv('../data/platform_aggregation.csv', index=False)

# Post type aggregation
post_type_agg = df.groupby('post_type').agg({
    'engagement_rate': 'mean',
    'post_id': 'count'
}).reset_index()
post_type_agg.columns = ['post_type', 'avg_engagement_rate', 'post_count']
post_type_agg.to_csv('../data/post_type_aggregation.csv', index=False)

print("Aggregated datasets saved for Tableau dashboard:")
print("- hourly_aggregation.csv")
print("- weekday_aggregation.csv")
print("- platform_aggregation.csv")
print("- post_type_aggregation.csv")