Step 1: Simulate the Dataset

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

np.random.seed(42)

n = 1000
handles = [f"@influencer{i}" for i in range(1, 21)]
content_types = ['Reel', 'Story', 'Carousel', 'Image']
hashtags_pool = ['#fitness', '#fashion', '#travel', '#food', '#beauty', '#tech', '#lifestyle']

data = []

for i in range(n):
    handle = np.random.choice(handles)
    content_type = np.random.choice(content_types)
    post_date = datetime(2023, 1, 1) + timedelta(days=np.random.randint(0, 365))
    likes = np.random.randint(100, 10000)
    comments = np.random.randint(5, 500)
    shares = np.random.randint(0, 300)
    followers = np.random.randint(5000, 500000)
    reach = int(followers * np.random.uniform(0.1, 0.9))
    hashtags = ','.join(np.random.choice(hashtags_pool, size=np.random.randint(1, 4), replace=False))

    data.append({
        'Post ID': f'POST{i:04}',
        'Influencer Handle': handle,
        'Content Type': content_type,
        'Post Date': post_date,
        'Likes': likes,
        'Comments': comments,
        'Shares': shares,
        'Followers': followers,
        'Post Reach': reach,
        'Hashtags': hashtags
    })

df = pd.DataFrame(data)
df.to_csv('influencer_posts.csv', index=False)


Step 2: Load and Prepare Data

In [2]:
df = pd.read_csv('influencer_posts.csv', parse_dates=['Post Date'])

# Engagement Rate = (Likes + Comments + Shares) / Followers
df['Engagement Rate (%)'] = ((df['Likes'] + df['Comments'] + df['Shares']) / df['Followers']) * 100


Step 3: Engagement Rate by Content Type

In [3]:
engagement_by_type = df.groupby('Content Type').agg(
    Avg_Engagement=('Engagement Rate (%)', 'mean')
).reset_index()


Plot: Avg. Engagement Rate by Content Type

In [4]:
import plotly.express as px

fig1 = px.bar(
    engagement_by_type,
    x='Content Type',
    y='Avg_Engagement',
    title='Average Engagement Rate by Content Type',
    text='Avg_Engagement',
    color='Avg_Engagement'
)
fig1.show()


Plot: Avg. Engagement Rate by Content Type

In [5]:
import plotly.express as px

fig2 = px.bar(
    engagement_by_type,
    x='Content Type',
    y='Avg_Engagement',
    title='Average Engagement Rate by Content Type',
    text='Avg_Engagement',
    color='Avg_Engagement'
)
fig2.show()


Step 4: Follower Growth Trends

In [6]:
# Let's simulate one influencer for this plot
handle = '@influencer1'
dates = pd.date_range(start='2023-01-01', periods=12, freq='MS')
followers = np.cumsum(np.random.randint(100, 3000, size=12))

follower_df = pd.DataFrame({
    'Month': dates,
    'Followers': followers,
    'Influencer Handle': handle
})


Plot: Follower Growth

In [7]:
fig3 = px.line(
    follower_df,
    x='Month',
    y='Followers',
    title=f'Follower Growth for {handle}',
    markers=True
)
fig3.show()


Step 5: Hashtag vs Performance

In [8]:
from collections import defaultdict

hashtag_engagement = defaultdict(list)

for _, row in df.iterrows():
    hashtags = row['Hashtags'].split(',')
    for tag in hashtags:
        hashtag_engagement[tag].append(row['Engagement Rate (%)'])

avg_hashtag_perf = pd.DataFrame([
    {'Hashtag': tag, 'Avg Engagement Rate': np.mean(rates)}
    for tag, rates in hashtag_engagement.items()
])

top_hashtags = avg_hashtag_perf.sort_values(by='Avg Engagement Rate', ascending=False).head(10)


Plot: Top 10 Hashtags by Avg. Engagement

In [9]:
fig4 = px.bar(
    top_hashtags,
    x='Hashtag',
    y='Avg Engagement Rate',
    title='Top 10 Hashtags by Engagement Rate',
    text='Avg Engagement Rate',
    color='Avg Engagement Rate'
)
fig4.show()


 Key Insights You Can Offer to Influencers/Agencies:
Which content formats drive the most engagement?

What are their follower growth trends?

Which hashtags consistently boost engagement?

Which posts are underperforming given reach size?

Exporting All Plots To Single HTML File

In [10]:
import plotly.io as pio

# List of all your figures
figures = [fig1, fig2, fig3, fig4]

# Generate HTML strings for each figure
html_parts = [pio.to_html(fig, full_html=False, include_plotlyjs='cdn') for fig in figures]

# Combine them into one full HTML document
full_html = f"""
<html>
<head>
    <title>Spotify Analysis</title>
</head>
<body>
    {''.join(html_parts)}
</body>
</html>
"""

# Save to file
output_path = "Instagram Influencer Engagement Analysis.html"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(full_html)

print(f"Saved to {output_path}")

Saved to Instagram Influencer Engagement Analysis.html
