In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('social_media_engagement1.csv')
df.head()

Unnamed: 0,post_id,platform,post_type,post_time,likes,comments,shares,post_day,sentiment_score
0,1,Facebook,image,8/17/2023 14:45,2121,474,628,Thursday,positive
1,2,Facebook,carousel,5/14/2023 0:45,3660,432,694,Sunday,neutral
2,3,Instagram,poll,2/21/2023 16:15,4955,408,688,Tuesday,negative
3,4,Twitter,image,11/16/2023 0:45,1183,90,187,Thursday,negative
4,5,Twitter,video,5/23/2023 0:30,3499,247,286,Tuesday,positive


# 1. Engagement Analysis

# What is the total number of likes, comments, and shares across all platforms?

In [None]:
df['df_engagement'] = df['likes'] + df['comments'] + df['shares']

total_likes = df['likes'].sum()
total_comments = df['comments'].sum()
total_shares = df['shares'].sum()

print('Total Likes:', total_likes)
print('Total Comments:', total_comments)
print('Total Shares:', total_shares)

Total Likes: 238181
Total Comments: 20266
Total Shares: 41565


# Which platform has the highest total engagement (likes + comments + shares)?

In [None]:
platform_engagement = df.groupby('platform')['df_engagement'].sum()
print(platform_engagement)

highest_platform = platform_engagement.idxmax()
print('Highest platform with [likes, comments, shares]===>:',highest_platform)

platform
Facebook     109537
Instagram    135268
Twitter       55207
Name: df_engagement, dtype: int64
Highest platform with [likes, comments, shares]===>: Instagram


# Which post received the highest total engagement? Provide its Post ID and Platform.

In [None]:
top_post = df.loc[df['df_engagement'].idxmax()]
print(top_post)

post_id                       39
platform                Facebook
post_type                  video
post_time          1/1/2023 6:00
likes                       5000
comments                     430
shares                       980
post_day                  Sunday
sentiment_score          neutral
df_engagement               6410
Name: 38, dtype: object


# 2. Platform Comparison

### Calculate the average likes, comments, and shares per platform.

In [None]:
avg_engagement = df.groupby('platform')[['likes', 'comments', 'shares']].mean()
print('Average Engagement per Platform:\n',avg_engagement)

Average Engagement per Platform:
                  likes    comments      shares
platform                                      
Facebook   2699.750000  248.906250  474.375000
Instagram  2999.833333  232.444444  525.166667
Twitter    1368.593750  122.906250  233.718750


### Which platform has the highest average sentiment score (you can map sentiment categories to numerical values: positive=1, neutral=0, negative=-1)?



In [None]:
sentiment_map = {'positive': 1, 'neutral': 0, 'negative': -1}
df['sentiment_score'] = df['sentiment_score'].map(sentiment_map)

avg_sentiment = df.groupby('platform')['sentiment_score'].mean()
print('Average sentiment scores for each platform:', avg_sentiment)

Average sentiment scores for each platform: platform
Facebook    NaN
Instagram   NaN
Twitter     NaN
Name: sentiment_score, dtype: float64


# 3. Post Type Insights

### Which post type generates the highest average number of likes?


In [None]:
avg_likes_posts = df.groupby('post_type')['likes'].mean()
print("Average Likes by Post Type:\n", avg_likes_posts)

print('---------------------------------------------------------------')

total_engagement_post_type = df.groupby('post_type')['df_engagement'].sum()
print('Total engagement by post types: \n',total_engagement_post_type)

Average Likes by Post Type:
 post_type
carousel    2263.576923
image       2032.764706
poll        3061.076923
text        1815.285714
video       2906.782609
Name: likes, dtype: float64
---------------------------------------------------------------
Total engagement by post types: 
 post_type
carousel    78799
image       44225
poll        48703
text        46801
video       81484
Name: df_engagement, dtype: int64


### Compare total engagements across different post types—identify the most engaging post type overall.



In [None]:
post_type_engagement = df.groupby('post_type')['total_engagement'].sum()
post_type_engagement_sorted = post_type_engagement.sort_values(ascending=False)

print('Total engagement by post types: \n',post_type_engagement_sorted)

most_engagement_post_type = post_type_engagement_sorted.idxmax()
most_engagement_value = post_type_engagement_sorted.max()

print(f"\n Most engagement post type is '{most_engagement_post_type}' with total engagement of {most_engagement_value}")

Total engagement by post types: 
 post_type
video       81484
carousel    78799
poll        48703
text        46801
image       44225
Name: total_engagement, dtype: int64

 Most engagement post type is 'video' with total engagement of 81484


# 4. Time-based Analysis

### Which day of the week yields the highest average engagement?



In [None]:
avg_day_engagement = df.groupby('post_day')['total_engagement'].mean()
print(avg_day_engagement)

print('-------------------------------------------------------------')

df['post_hour'] = pd.to_datetime(df['post_time'],errors='coerce').dt.hour

day_hour_engagement = df.groupby(['post_day', 'post_hour'])['total_engagement'].mean()
print("Best Day-Hour Slot:\n", day_hour_engagement.sort_values(ascending=False).head(1))

post_day
Friday       3316.687500
Monday       2208.454545
Saturday     2467.400000
Sunday       3478.142857
Thursday     2854.500000
Tuesday      2989.533333
Wednesday    3264.944444
Name: total_engagement, dtype: float64
-------------------------------------------------------------
Best Day-Hour Slot:
 post_day  post_hour
Sunday    6            6410.0
Name: total_engagement, dtype: float64


### Identify the best time of the week (post day + hour) to post for maximum likes.



In [None]:
avg_day_engagement = df.groupby('post_day')['total_engagement'].mean()
print(avg_day_engagement)

print('-------------------------------------------------------------')



post_day
Friday       3316.687500
Monday       2208.454545
Saturday     2467.400000
Sunday       3478.142857
Thursday     2854.500000
Tuesday      2989.533333
Wednesday    3264.944444
Name: total_engagement, dtype: float64
-------------------------------------------------------------


# 5. Sentiment Insights

### Count the number of posts for each sentiment score category.



In [None]:
print(df['sentiment_score'].value_counts())

Series([], Name: count, dtype: int64)


### What is the average engagement (likes + comments + shares) for posts with:

1. Positive sentiment?

2. Neutral sentiment?

3. Negative sentiment?

In [None]:
avg_engagement_by_sentiment = df.groupby('sentiment_score')['total_engagement'].mean()

print(avg_engagement_by_sentiment)

Series([], Name: total_engagement, dtype: float64)


# 6. Custom Challenge (Advanced)


### Derive a new column called “Total Engagement” as the sum of likes, comments, and shares.



### Find the top 3 posts with highest total engagement—report their post IDs, platforms, and sentiment.



In [None]:
top_3_post = df.nlargest(3, 'total_engagement')
print('Top 3 post by engagement \n', top_3_post[['post_id','platform','total_engagement']])

Top 3 post by engagement 
     post_id   platform  total_engagement
38       39   Facebook              6410
30       31   Facebook              6222
73       74  Instagram              6198
