In [2]:
import numpy as np
import pandas as pd

In [3]:
sm = pd.read_csv('F:/Skill Up/Coding Practice/Np and pd/social_media_engagement.csv')
sm.head()

Unnamed: 0,PostID,UserID,PostDate,ContentType,Likes,Comments,Shares
0,101,201,2025-07-01,Image,120,10,5
1,102,202,2025-07-01,Video,340,45,30
2,103,203,2025-07-02,Text,80,5,2
3,104,201,2025-07-02,Image,150,12,6
4,105,204,2025-07-03,Video,300,40,25


# **1. Basic Aggregations:**

In [97]:
# Total number of posts
desc = sm.describe()
post_count = sm.PostID.count()
post_count

10

In [5]:
# Total number of likes, shares and comments
like_count = sm.Likes.sum()
share_count = sm.Shares.sum()
comment_count = sm.Comments.sum()
interaction_count = like_count + share_count + comment_count
interaction_count

2106

In [6]:
# average likes, shares, comments per post
avg_likes = sm.Likes.mean()
avg_shares = sm.Shares.mean()
avg_comments = sm.Comments.mean()



# 2. Content Type Analysis

In [65]:
#Content based total likes, shares and comments
content_like_count = sm.groupby(['ContentType']).Likes.sum()
content_share_count = sm.groupby(['ContentType']).Shares.sum()
content_comment_count =sm.groupby(['ContentType']).Comments.sum()

In [70]:
#The content type that has the highest average engagement per post

content_post_count = sm.groupby(['ContentType']).PostID.count()  
l = content_like_count.to_frame()
p = content_post_count.to_frame()
s = content_share_count.to_frame()
c = content_comment_count.to_frame()
content_details = pd.concat([l, s, c, p], axis=1)  #creating a new DataFrame with Content and its total engagement 
content_details = content_details.rename(columns={'PostID' : 'TotalPosts'})

#Max Avg Like Content
avg_likes_per_post = ((content_details.Likes) /  (content_details.TotalPosts))
max_avg_likes = avg_likes_per_post.idxmax()
print("Like:", max_avg_likes)

 #Max Avg share content
avg_shares_per_post = ((content_details.Shares) /  (content_details.TotalPosts))
max_avg_shares = avg_shares_per_post.idxmax()
print("Shares:", max_avg_shares)

 #Max Avg comment content
avg_comments_per_post = ((content_details.Comments) /  (content_details.TotalPosts))
max_avg_comments = avg_comments_per_post.idxmax()
print("Comments:", max_avg_comments)


Like: Video
Shares: Video
Comments: Video


# 3. User Engagement Analysis

In [108]:
#Identify top 3 users with the highest cumulative engagement (Likes + Comments + Shares)
# Calculate Total Engagement per post
sm['TotalEngagement'] = sm['Likes'] + sm['Shares'] + sm['Comments']

# Group by UserID to get cumulative engagement per user
user_total_engagement = sm.groupby('UserID')['TotalEngagement'].sum()

# Top 3 users by cumulative engagement
top_3_users = user_total_engagement.nlargest(3)
print("Top 3 Users with Highest Cumulative Engagement:\n", top_3_users)

Top 3 Users with Highest Cumulative Engagement:
 UserID
202    795
204    365
201    303
Name: TotalEngagement, dtype: int64


In [109]:
#Calculate average engagement per post per user
# Number of posts per user
user_post_counts = sm.groupby('UserID')['PostID'].count()

# Average engagement per post per user
avg_engagement_per_user = user_total_engagement / user_post_counts

print("\nAverage Engagement per Post per User:\n", avg_engagement_per_user)


Average Engagement per Post per User:
 UserID
201    151.5
202    397.5
203     87.0
204    365.0
205     65.0
206    230.0
207     54.0
208    207.0
dtype: float64


# 4. Temporal Trends

In [118]:
#Analyze daily trends: total likes, comments, shares per day.
engagement_date_count = sm.groupby('PostDate').TotalEngagement.sum()
engagement_date_count
# sm.head(1)


PostDate
2025-07-01    550
2025-07-02    255
2025-07-03    430
2025-07-04    610
2025-07-05    261
Name: TotalEngagement, dtype: int64

In [116]:
# Find which date had the highest total engagement.
hi_eng_date = engagement_date_count.idxmax()
hi_eng_date

'2025-07-04'

# 5. Engagement Ratios

In [122]:
# Compute Like-to-Comment ratio and Share-to-Like ratio for each post
sm['like_to_comment'] = (sm.Likes) / (sm.Comments)
sm['share_to_like'] = (sm.Shares) / (sm.Likes)
sm

Unnamed: 0,PostID,UserID,PostDate,ContentType,Likes,Comments,Shares,TotalEngagement,like_to_comment,share_to_like
0,101,201,2025-07-01,Image,120,10,5,135,12.0,0.041667
1,102,202,2025-07-01,Video,340,45,30,415,7.555556,0.088235
2,103,203,2025-07-02,Text,80,5,2,87,16.0,0.025
3,104,201,2025-07-02,Image,150,12,6,168,12.5,0.04
4,105,204,2025-07-03,Video,300,40,25,365,7.5,0.083333
5,106,205,2025-07-03,Text,60,4,1,65,15.0,0.016667
6,107,206,2025-07-04,Image,200,20,10,230,10.0,0.05
7,108,202,2025-07-04,Video,310,42,28,380,7.380952,0.090323
8,109,207,2025-07-05,Text,50,3,1,54,16.666667,0.02
9,110,208,2025-07-05,Image,180,18,9,207,10.0,0.05


In [128]:
# Identify posts with unusually high or low ratios.
lc_mean = sm.like_to_comment.mean()
lc_std = sm.like_to_comment.std()
sm['lc_z_score'] = ((sm.like_to_comment - lc_mean) / lc_std)


sl_mean = sm.share_to_like.mean()
sl_std = sm.share_to_like.std()
sm['sl_z_score'] = ((sm.share_to_like - sl_mean) / sl_std)

unusual_lc_post = sm.loc[((sm.lc_z_score > 1) | (sm.lc_z_score < -1))]
unusual_sl_post = sm.loc[((sm.sl_z_score > 1) | (sm.sl_z_score < -1))]

Unnamed: 0,PostID,UserID,PostDate,ContentType,Likes,Comments,Shares,TotalEngagement,like_to_comment,share_to_like,lc_z_score,sl_z_score
1,102,202,2025-07-01,Video,340,45,30,415,7.555556,0.088235,-1.099781,1.352092
2,103,203,2025-07-02,Text,80,5,2,87,16.0,0.025,1.278607,-0.915038
4,105,204,2025-07-03,Video,300,40,25,365,7.5,0.083333,-1.115428,1.176345
7,108,202,2025-07-04,Video,310,42,28,380,7.380952,0.090323,-1.148958,1.426926
8,109,207,2025-07-05,Text,50,3,1,54,16.666667,0.02,1.466374,-1.0943


# 6. Engagement Metric

In [144]:
# Create a new metric: Engagement Score = (Likes * 0.5) + (Comments * 2) + (Shares * 3).
# Rank posts by Engagement Score and list the top 5.
sm['EngagementMetric'] = sm.apply(lambda x: ((x.Likes * 0.5 ) + (x.Shares * 2) + (x.Comments * 3)), axis=1)
sm['rank'] = sm.EngagementMetric.rank(ascending = False, method = 'dense').astype(int)
top_5_posts = sm.nsmallest(5, 'rank')[['PostID', 'UserID', 'Likes', 'Comments', 'Shares', 'EngagementMetric', 'rank']]
print(top_5_posts)

   PostID  UserID    PostDate ContentType  Likes  Comments  Shares  \
1     102     202  2025-07-01       Video    340        45      30   
7     108     202  2025-07-04       Video    310        42      28   
4     105     204  2025-07-03       Video    300        40      25   
6     107     206  2025-07-04       Image    200        20      10   
9     110     208  2025-07-05       Image    180        18       9   

   TotalEngagement  like_to_comment  share_to_like  lc_z_score  sl_z_score  \
1              415         7.555556       0.088235   -1.099781    1.352092   
7              380         7.380952       0.090323   -1.148958    1.426926   
4              365         7.500000       0.083333   -1.115428    1.176345   
6              230        10.000000       0.050000   -0.411300   -0.018731   
9              207        10.000000       0.050000   -0.411300   -0.018731   

   EngagementMetric  rank  
1             365.0     1  
7             337.0     2  
4             320.0     3 