In [2]:
import numpy as np
import pandas as pd

In [3]:
ig = pd.read_csv('instagram.csv', sep=',')
ig

Unnamed: 0,post_id,account_id,account_type,follower_count,media_type,content_category,traffic_source,has_call_to_action,post_datetime,post_date,...,comments,shares,saves,reach,impressions,engagement_rate,followers_gained,caption_length,hashtags_count,performance_bucket_label
0,IG0000001,7,brand,3551,reel,Technology,Home Feed,1,2024-11-30 06:00:00,2024-11-30,...,5,7,34,4327,6230,0.0385,899,100,7,medium
1,IG0000002,20,creator,31095,image,Fitness,Hashtags,1,2025-08-15 15:00:00,2025-08-15,...,10,21,68,7451,8268,0.0663,805,122,5,viral
2,IG0000003,15,brand,8167,reel,Beauty,Reels Feed,0,2025-09-11 16:00:00,2025-09-11,...,2,1,22,1639,2616,0.0531,758,115,8,high
3,IG0000004,11,creator,9044,carousel,Music,External,0,2025-09-18 03:00:00,2025-09-18,...,0,7,0,2877,3171,0.0309,402,115,7,medium
4,IG0000005,8,creator,15986,reel,Technology,Profile,0,2025-03-21 09:00:00,2025-03-21,...,8,5,21,5350,8503,0.0221,155,112,9,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29994,IG0029995,5,brand,10739,carousel,Travel,Reels Feed,0,2024-12-18 10:00:00,2024-12-18,...,1,2,5,1564,2493,0.0032,124,127,8,low
29995,IG0029996,3,brand,10018,image,Beauty,Hashtags,0,2025-05-05 15:00:00,2025-05-05,...,2,1,7,2042,2492,0.0209,310,114,12,low
29996,IG0029997,18,creator,7486,image,Photography,Explore,1,2025-05-26 10:00:00,2025-05-26,...,10,16,59,5887,7528,0.0558,223,115,4,high
29997,IG0029998,6,creator,10034,carousel,Technology,Explore,1,2025-08-02 19:00:00,2025-08-02,...,3,0,19,5372,6312,0.0333,978,124,4,medium


In [27]:
ig.columns

Index(['post_id', 'account_id', 'account_type', 'follower_count', 'media_type',
       'content_category', 'traffic_source', 'has_call_to_action',
       'post_datetime', 'post_date', 'post_hour', 'day_of_week', 'likes',
       'comments', 'shares', 'saves', 'reach', 'impressions',
       'engagement_rate', 'followers_gained', 'caption_length',
       'hashtags_count', 'performance_bucket_label'],
      dtype='object')

## A. ACCOUNT & AUDIENCE INSIGHTS

### Q1. How many unique accounts are present?


In [8]:
ig['account_id'].nunique()

20

### Q2. How many posts are created by brands vs creators?

In [9]:
ig['account_type'].value_counts()

account_type
creator    20944
brand       9055
Name: count, dtype: int64

### Q3. What is the average follower count by account type?


In [11]:
ig.groupby('account_type')['follower_count'].mean()

account_type
brand      10229.793043
creator    10299.279507
Name: follower_count, dtype: float64

### Q4. Do creators or brands achieve higher engagement rates?


In [12]:
ig.groupby('account_type')['engagement_rate'].mean()

account_type
brand      0.042074
creator    0.042122
Name: engagement_rate, dtype: float64

### Q5. Which account type gains more followers per post?

In [13]:
ig.groupby('account_type')['followers_gained'].mean()

account_type
brand      502.632247
creator    501.945521
Name: followers_gained, dtype: float64

### Q6. Is follower growth correlated with follower base size?

In [15]:
ig[['follower_count','followers_gained']].corr()

Unnamed: 0,follower_count,followers_gained
follower_count,1.0,0.012654
followers_gained,0.012654,1.0


### Q7. Do smaller accounts sometimes outperform larger ones in engagement?

In [24]:
ig.assign(size=pd.cut(ig['follower_count'], bins=[0,5000,17000,35000], labels=['Small','Mid','Large'])).groupby('size')['engagement_rate'].mean()

  ig.assign(size=pd.cut(ig['follower_count'], bins=[0,5000,17000,35000], labels=['Small','Mid','Large'])).groupby('size')['engagement_rate'].mean()


size
Small    0.042636
Mid      0.041907
Large    0.042099
Name: engagement_rate, dtype: float64

### Q8. What percentage of posts come from high-follower accounts?


In [25]:
(ig['follower_count'] > ig['follower_count'].quantile(0.75)).mean() * 100

np.float64(24.987499583319444)

### Q9. Which account types dominate viral posts?


In [30]:
ig[ig['performance_bucket_label']=='viral']['account_type'].value_counts()

account_type
creator    5230
brand      2270
Name: count, dtype: int64

### Q10. Are viral posts more common among creators or brands?

In [31]:
ig.assign(virality=ig['performance_bucket_label']=='viral').groupby('account_type')['virality'].mean()

account_type
brand      0.250690
creator    0.249714
Name: virality, dtype: float64

## B. CONTENT TYPE & FORMAT INSIGHTS

### Q11. Which media type (reel, image, carousel) is posted most frequently?


In [32]:
ig['media_type'].value_counts()

media_type
image       11927
carousel    10627
reel         7445
Name: count, dtype: int64

### Q12. Which media type generates the highest average engagement rate?


In [33]:
ig.groupby('media_type')['engagement_rate'].mean()

media_type
carousel    0.041829
image       0.042256
reel        0.042266
Name: engagement_rate, dtype: float64

### Q13. Which media type leads to the highest follower gain?


In [34]:
ig.groupby('media_type')['followers_gained'].mean()

media_type
carousel    506.861014
image       500.914312
reel        497.416387
Name: followers_gained, dtype: float64

### Q14. Do reels consistently outperform images and carousels?


In [35]:
ig.groupby('media_type')['reach'].mean()

media_type
carousel    6329.100499
image       6268.616249
reel        6197.831296
Name: reach, dtype: float64

### Q15. Which media type has the highest reach-to-impression ratio?


In [38]:
(ig['reach'] / ig['impressions']).groupby(ig['media_type']).mean()

media_type
carousel    0.749923
image       0.749146
reel        0.749793
dtype: float64

### Q16. Which content formats are more likely to go viral?


In [41]:
(ig['performance_bucket_label']=='viral').groupby(ig['media_type']).mean()

media_type
carousel    0.249083
image       0.252536
reel        0.247280
Name: performance_bucket_label, dtype: float64

### Q17. Is there a trade-off between reach and engagement by media type?


In [43]:
ig.groupby('media_type')[['reach','engagement_rate']].mean()

Unnamed: 0_level_0,reach,engagement_rate
media_type,Unnamed: 1_level_1,Unnamed: 2_level_1
carousel,6329.100499,0.041829
image,6268.616249,0.042256
reel,6197.831296,0.042266


### Q18. Which media type produces the highest saves per post?


In [44]:
ig.groupby('media_type')['saves'].mean()

media_type
carousel    42.440764
image       42.687180
reel        42.354332
Name: saves, dtype: float64

### Q19. Do carousel posts encourage more comments and shares?


In [45]:
ig.groupby('media_type')[['comments','shares']].mean()

Unnamed: 0_level_0,comments,shares
media_type,Unnamed: 1_level_1,Unnamed: 2_level_1
carousel,8.499294,14.333302
image,8.548336,14.568793
reel,8.511887,14.332035


### Q20. Are certain media types underutilized despite strong performance?


In [46]:
ig.groupby('media_type').agg(posts = ('post_id', 'count'), avg_engagement = ('engagement_rate', 'mean'))

Unnamed: 0_level_0,posts,avg_engagement
media_type,Unnamed: 1_level_1,Unnamed: 2_level_1
carousel,10627,0.041829
image,11927,0.042256
reel,7445,0.042266


## C. CONTENT CATEGORY INSIGHTS

### Q21. Which content categories are most common?


In [47]:
ig['content_category'].value_counts()

content_category
Photography    3035
Fashion        3034
Technology     3025
Lifestyle      3017
Food           3010
Fitness        3004
Music          3003
Travel         2968
Beauty         2953
Comedy         2950
Name: count, dtype: int64

### Q22. Which category receives the highest average engagement rate?


In [48]:
ig.groupby('content_category')['engagement_rate'].mean()

content_category
Beauty         0.042197
Comedy         0.041854
Fashion        0.042615
Fitness        0.042720
Food           0.042078
Lifestyle      0.041646
Music          0.042808
Photography    0.041452
Technology     0.042023
Travel         0.041677
Name: engagement_rate, dtype: float64

### Q23. Which categories generate the most reach?


In [49]:
ig.groupby('content_category')['reach'].mean()

content_category
Beauty         6340.979343
Comedy         6303.413898
Fashion        6347.159855
Fitness        6218.894807
Food           6248.638538
Lifestyle      6261.529997
Music          6274.301365
Photography    6417.292916
Technology     6092.297521
Travel         6220.456873
Name: reach, dtype: float64

### Q24. Which categories lead to higher follower growth?


In [50]:
ig.groupby('content_category')['followers_gained'].mean()

content_category
Beauty         505.479512
Comedy         498.470508
Fashion        497.576796
Fitness        497.250000
Food           503.049502
Lifestyle      508.992377
Music          512.873793
Photography    503.241516
Technology     491.641983
Travel         503.033019
Name: followers_gained, dtype: float64

### Q25. Are some categories consistently low-performing?


In [51]:
ig.groupby('content_category')['engagement_rate'].mean().sort_values()

content_category
Photography    0.041452
Lifestyle      0.041646
Travel         0.041677
Comedy         0.041854
Technology     0.042023
Food           0.042078
Beauty         0.042197
Fashion        0.042615
Fitness        0.042720
Music          0.042808
Name: engagement_rate, dtype: float64

### Q26. Which categories produce the most viral posts?


In [52]:
ig[ig['performance_bucket_label']=='viral']['content_category'].value_counts()

content_category
Fashion        804
Music          768
Fitness        758
Food           757
Comedy         745
Travel         743
Photography    739
Beauty         736
Technology     733
Lifestyle      717
Name: count, dtype: int64

### Q27. Do niche categories outperform broad categories?


In [54]:
ig.groupby('content_category')['engagement_rate'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
content_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Beauty,2953.0,0.042197,0.02485,0.0013,0.024,0.0403,0.0569,0.2531
Comedy,2950.0,0.041854,0.023277,0.002,0.0238,0.041,0.0571,0.2658
Fashion,3034.0,0.042615,0.02404,0.0014,0.0247,0.0412,0.0578,0.2421
Fitness,3004.0,0.04272,0.024505,0.0008,0.024675,0.041,0.0574,0.2274
Food,3010.0,0.042078,0.024832,0.0015,0.023325,0.0407,0.057,0.271
Lifestyle,3017.0,0.041646,0.023047,0.0004,0.0243,0.0404,0.0562,0.2414
Music,3003.0,0.042808,0.024926,0.0016,0.02425,0.0415,0.05775,0.2476
Photography,3035.0,0.041452,0.024037,0.0003,0.0234,0.0399,0.0563,0.2544
Technology,3025.0,0.042023,0.023952,0.0015,0.024,0.0406,0.0566,0.2234
Travel,2968.0,0.041677,0.023386,0.0,0.0239,0.04,0.0567,0.2371


### Q28. Is engagement evenly distributed across categories?


In [59]:
ig.groupby('content_category')['likes'].sum()

content_category
Beauty         851764
Comedy         852554
Fashion        900538
Fitness        868575
Food           865766
Lifestyle      862091
Music          877275
Photography    881337
Technology     840236
Travel         829184
Name: likes, dtype: int64

### Q29. Which categories generate more saves than likes?


In [61]:
(ig['saves'] / ig['likes']).groupby(ig['content_category']).mean()

content_category
Beauty         inf
Comedy         inf
Fashion        inf
Fitness        inf
Food           inf
Lifestyle      inf
Music          inf
Photography    inf
Technology     inf
Travel         inf
dtype: float64

### 30. Are certain categories more sensitive to posting time?


In [62]:
ig.groupby(['content_category','post_hour'])['engagement_rate'].mean()

content_category  post_hour
Beauty            0            0.039599
                  1            0.038694
                  2            0.040859
                  3            0.042227
                  4            0.042111
                                 ...   
Travel            19           0.042880
                  20           0.040216
                  21           0.039164
                  22           0.039563
                  23           0.041447
Name: engagement_rate, Length: 240, dtype: float64

## D. TRAFFIC SOURCE INSIGHTS

### Q31. Which traffic source drives the most reach?


In [64]:
ig.groupby('traffic_source')['reach'].sum()

traffic_source
Explore       30254148
External      31938510
Hashtags      32048786
Home Feed     31389769
Profile       30873169
Reels Feed    31663609
Name: reach, dtype: int64

### Q32. Which traffic source has the highest engagement rate?


In [65]:
ig.groupby('traffic_source')['engagement_rate'].mean()

traffic_source
Explore       0.041917
External      0.042345
Hashtags      0.041821
Home Feed     0.041548
Profile       0.042568
Reels Feed    0.042453
Name: engagement_rate, dtype: float64

### Q33. Are Home Feed posts more reliable than Reels Feed posts?


In [67]:
(ig['performance_bucket_label']=='viral').groupby(ig['traffic_source']).mean()

traffic_source
Explore       0.246615
External      0.258941
Hashtags      0.246494
Home Feed     0.240284
Profile       0.258565
Reels Feed    0.249304
Name: performance_bucket_label, dtype: float64

### Q34. Which traffic source contributes most to viral performance?


In [68]:
ig.groupby('traffic_source')['followers_gained'].mean()

traffic_source
Explore       502.752975
External      502.770629
Hashtags      492.170057
Home Feed     502.544092
Profile       511.135228
Reels Feed    501.749105
Name: followers_gained, dtype: float64

### Q36. Which traffic sources drive higher saves and shares?


In [69]:
ig.groupby('traffic_source')[['saves', 'shares']].mean()

Unnamed: 0_level_0,saves,shares
traffic_source,Unnamed: 1_level_1,Unnamed: 2_level_1
Explore,41.974354,14.145055
External,43.645355,14.803397
Hashtags,42.22062,14.409046
Home Feed,41.358059,14.025843
Profile,42.898428,14.601169
Reels Feed,43.012137,14.574015


### Q37. Are external traffic posts worth the effort?


In [70]:
ig[ig['traffic_source']=='External']['engagement_rate'].mean()

np.float64(0.04234453546453546)

### Q38. Which traffic sources underperform despite high impressions?


In [72]:
ig.groupby('traffic_source')[['engagement_rate','impressions']].mean()

Unnamed: 0_level_0,engagement_rate,impressions
traffic_source,Unnamed: 1_level_1,Unnamed: 2_level_1
Explore,0.041917,8379.606894
External,0.042345,8599.33047
Hashtags,0.041821,8566.884258
Home Feed,0.041548,8370.805287
Profile,0.042568,8385.872229
Reels Feed,0.042453,8496.181854


### Q39. Is traffic source effectiveness dependent on media type?


In [73]:
ig.groupby(['traffic_source','media_type'])['engagement_rate'].mean()

traffic_source  media_type
Explore         carousel      0.041434
                image         0.042209
                reel          0.042121
External        carousel      0.042329
                image         0.042277
                reel          0.042480
Hashtags        carousel      0.042209
                image         0.041684
                reel          0.041493
Home Feed       carousel      0.041250
                image         0.041368
                reel          0.042264
Profile         carousel      0.041874
                image         0.042791
                reel          0.043165
Reels Feed      carousel      0.041871
                image         0.043222
                reel          0.042102
Name: engagement_rate, dtype: float64

### Q40. Which traffic source provides the best reach-to-engagement ratio?


In [75]:
(ig['reach'] / ig['engagement_rate']).groupby(ig['traffic_source']).mean()

traffic_source
Explore       2.281055e+05
External      2.281408e+05
Hashtags      2.291097e+05
Home Feed              inf
Profile       2.102783e+05
Reels Feed    2.285449e+05
dtype: float64

## E. CALL-TO-ACTION (CTA) INSIGHTS

### Q41. Do posts with a call-to-action have higher engagement?


In [77]:
ig.groupby('has_call_to_action')['engagement_rate'].mean()

has_call_to_action
0    0.042232
1    0.041875
Name: engagement_rate, dtype: float64

### Q42. How much additional follower growth comes from CTA posts?


In [78]:
ig.groupby('has_call_to_action')['followers_gained'].mean()

has_call_to_action
0    501.838247
1    502.740132
Name: followers_gained, dtype: float64

### Q43. Do CTA posts increase comments more than likes?


In [79]:
ig.groupby('has_call_to_action')[['comments','likes']].mean()

Unnamed: 0_level_0,comments,likes
has_call_to_action,Unnamed: 1_level_1,Unnamed: 2_level_1
0,8.513565,288.165438
1,8.537513,286.697888


### Q44. Are CTA posts more likely to be viral?


In [80]:
(ig['performance_bucket_label']=='viral').groupby(ig['has_call_to_action']).mean()

has_call_to_action
0    0.251331
1    0.247539
Name: performance_bucket_label, dtype: float64

### Q45. Does CTA effectiveness depend on media type?


In [82]:
ig.groupby(['has_call_to_action','media_type'])['engagement_rate'].mean()

has_call_to_action  media_type
0                   carousel      0.041984
                    image         0.042322
                    reel          0.042440
1                   carousel      0.041539
                    image         0.042133
                    reel          0.041941
Name: engagement_rate, dtype: float64

### Q46. Do CTAs improve saves and shares?


In [83]:
ig.groupby('has_call_to_action')[['saves','shares']].mean()

Unnamed: 0_level_0,saves,shares
has_call_to_action,Unnamed: 1_level_1,Unnamed: 2_level_1
0,42.544891,14.453522
1,42.465736,14.376374


### Q47. Are CTAs more effective for creators or brands?


In [85]:
ig.groupby(['has_call_to_action','account_type'])['engagement_rate'].mean()

has_call_to_action  account_type
0                   brand           0.042199
                    creator         0.042246
1                   brand           0.041836
                    creator         0.041892
Name: engagement_rate, dtype: float64

### Q48. Is there any downside to overusing CTAs?


In [86]:
ig.groupby('has_call_to_action')['caption_length'].mean()

has_call_to_action
0    119.995086
1    120.241900
Name: caption_length, dtype: float64

### Q49. Do CTA posts perform better in specific content categories?


In [87]:
ig.groupby(['has_call_to_action','content_category'])['engagement_rate'].mean()

has_call_to_action  content_category
0                   Beauty              0.042598
                    Comedy              0.042517
                    Fashion             0.042263
                    Fitness             0.042367
                    Food                0.041812
                    Lifestyle           0.042303
                    Music               0.042785
                    Photography         0.041413
                    Technology          0.042321
                    Travel              0.041967
1                   Beauty              0.041451
                    Comedy              0.040588
                    Fashion             0.043295
                    Fitness             0.043364
                    Food                0.042558
                    Lifestyle           0.040464
                    Music               0.042849
                    Photography         0.041528
                    Technology          0.041471
                    Travel      

### Q50. What is the optimal use of CTAs for engagement growth?


In [89]:
ig.groupby('has_call_to_action')['performance_bucket_label'].value_counts(normalize=True)

has_call_to_action  performance_bucket_label
0                   high                        0.252764
                    viral                       0.251331
                    medium                      0.249437
                    low                         0.246468
1                   low                         0.256619
                    medium                      0.250980
                    viral                       0.247539
                    high                        0.244863
Name: proportion, dtype: float64

## F. POST TIMING & TEMPORAL INSIGHTS

### Q51. Which hour of the day yields the highest engagement rate?


In [92]:
ig.groupby('post_hour')['engagement_rate'].mean().sort_values(ascending=False)

post_hour
3     0.043425
8     0.043282
2     0.043279
17    0.042693
20    0.042684
14    0.042640
4     0.042377
5     0.042355
16    0.042245
9     0.042196
21    0.042152
12    0.042066
18    0.042046
0     0.041930
13    0.041898
11    0.041882
1     0.041882
6     0.041605
22    0.041549
19    0.041523
23    0.041367
10    0.041301
15    0.041271
7     0.041018
Name: engagement_rate, dtype: float64

### Q52. Which day of the week produces the most reach?


In [93]:
ig.groupby('day_of_week')['engagement_rate'].mean().sort_values(ascending=False)

day_of_week
Tuesday      0.042652
Sunday       0.042484
Friday       0.042361
Saturday     0.042093
Thursday     0.041941
Wednesday    0.041630
Monday       0.041600
Name: engagement_rate, dtype: float64

### Q53. Are weekends better than weekdays for engagement?


In [94]:
ig.assign(weekend=ig['day_of_week'].isin(['Saturday','Sunday'])).groupby('weekend')['engagement_rate'].mean()

weekend
False    0.042037
True     0.042287
Name: engagement_rate, dtype: float64

### Q54. Is there a consistent best posting window?


In [95]:
ig.groupby(['day_of_week','post_hour'])['engagement_rate'].mean()

day_of_week  post_hour
Friday       0            0.042891
             1            0.041221
             2            0.043596
             3            0.045778
             4            0.042958
                            ...   
Wednesday    19           0.042347
             20           0.041667
             21           0.042436
             22           0.041352
             23           0.041016
Name: engagement_rate, Length: 168, dtype: float64

### Q55. Do viral posts cluster around specific times?


In [97]:
ig[ig['performance_bucket_label']=='viral'].groupby('post_hour').size()

post_hour
0     318
1     310
2     325
3     326
4     334
5     299
6     304
7     288
8     316
9     318
10    295
11    341
12    319
13    313
14    332
15    322
16    306
17    314
18    291
19    293
20    320
21    299
22    323
23    294
dtype: int64

### Q56. Does posting time affect follower growth?


In [98]:
ig.groupby('post_hour')['followers_gained'].mean()

post_hour
0     515.816758
1     491.766044
2     506.441841
3     501.150478
4     496.232467
5     512.358553
6     497.473469
7     510.821821
8     482.923510
9     495.334932
10    499.439465
11    507.698363
12    504.531275
13    501.159375
14    492.606864
15    507.043070
16    506.182645
17    495.410628
18    508.714170
19    500.228868
20    497.775943
21    499.280234
22    497.989062
23    523.914333
Name: followers_gained, dtype: float64

### Q57. Are late-night posts underperforming?


In [102]:
ig[ig['post_hour']>=22]['engagement_rate'].mean()

np.float64(0.04146054530874098)

### Q58. Which posting hours lead to higher saves?


In [103]:
ig.groupby('post_hour')['saves'].mean().sort_values(ascending=False).head(5)

post_hour
3     47.321656
21    44.849041
2     44.745607
17    44.725443
8     44.265700
Name: saves, dtype: float64

### Q59. Does posting time matter more for reels than images?


In [105]:
ig.groupby(['media_type','post_hour'])['engagement_rate'].mean()

media_type  post_hour
carousel    0            0.042067
            1            0.042821
            2            0.042506
            3            0.042426
            4            0.041826
                           ...   
reel        19           0.041702
            20           0.040836
            21           0.043733
            22           0.042383
            23           0.042395
Name: engagement_rate, Length: 72, dtype: float64

### Q60. Are certain days oversaturated with posts?


In [108]:
ig['day_of_week'].value_counts()

day_of_week
Thursday     4351
Tuesday      4351
Wednesday    4341
Monday       4279
Saturday     4254
Friday       4218
Sunday       4205
Name: count, dtype: int64

## G. CAPTION & HASHTAG STRATEGY INSIGHTS

### Q61. Is there a relationship between caption length and engagement?


In [109]:
ig.groupby('caption_length')['engagement_rate'].mean()

caption_length
70     0.011800
79     0.008500
80     0.036633
81     0.063400
82     0.033400
         ...   
160    0.014500
161    0.026800
163    0.026550
165    0.012750
166    0.012700
Name: engagement_rate, Length: 87, dtype: float64

### Q62. Do longer captions generate more comments?


In [111]:
ig[['caption_length','comments']].corr()

Unnamed: 0,caption_length,comments
caption_length,1.0,0.002478
comments,0.002478,1.0


### Q63. Is there an optimal hashtag count for engagement?


In [113]:
ig.groupby('hashtags_count')['engagement_rate'].mean()

hashtags_count
0     0.036070
1     0.047177
2     0.042819
3     0.041679
4     0.040964
5     0.042145
6     0.042490
7     0.042119
8     0.042325
9     0.042316
10    0.041753
11    0.042622
12    0.041709
13    0.041198
14    0.042686
15    0.040637
16    0.041024
17    0.044574
18    0.040041
19    0.050937
20    0.028633
21    0.087100
Name: engagement_rate, dtype: float64

### Q64. Do more hashtags always mean higher reach?


In [114]:
ig[['hashtags_count','reach']].corr()

Unnamed: 0,hashtags_count,reach
hashtags_count,1.0,0.002207
reach,0.002207,1.0


### Q65. Are viral posts associated with specific hashtag ranges?


In [116]:
ig[ig['performance_bucket_label']=='viral']['hashtags_count'].describe()

count    7500.00000
mean        8.00960
std         2.83298
min         0.00000
25%         6.00000
50%         8.00000
75%        10.00000
max        21.00000
Name: hashtags_count, dtype: float64

### Q66. Do hashtags impact impressions more than engagement?


In [117]:
ig[['hashtags_count','impressions']].corr()

Unnamed: 0,hashtags_count,impressions
hashtags_count,1.0,0.0018
impressions,0.0018,1.0


### Q67. Which hashtag range maximizes follower growth?


In [118]:
ig.groupby('hashtags_count')['followers_gained'].mean()

hashtags_count
0     440.500000
1     580.704225
2     495.932945
3     506.686164
4     500.270223
5     504.377274
6     499.824205
7     501.402456
8     506.569231
9     504.019832
10    492.508059
11    508.430978
12    510.365751
13    482.394105
14    486.622093
15    543.480620
16    497.285714
17    449.000000
18    505.250000
19    409.000000
20    585.666667
21    525.000000
Name: followers_gained, dtype: float64

### Q68. Is hashtag effectiveness dependent on content category?


In [119]:
ig.groupby(['hashtags_count','content_category'])['engagement_rate'].mean()

hashtags_count  content_category
0               Fashion             0.0570
                Fitness             0.0279
                Food                0.0667
                Music               0.0229
                Technology          0.0365
                                     ...  
20              Fitness             0.0536
                Food                0.0320
                Music               0.0211
                Technology          0.0119
21              Technology          0.0871
Name: engagement_rate, Length: 197, dtype: float64

### Q69. Are hashtags less important for reels?


In [121]:
ig[ig['media_type']=='reel'][['hashtags_count','engagement_rate']].corr()

Unnamed: 0,hashtags_count,engagement_rate
hashtags_count,1.0,0.007528
engagement_rate,0.007528,1.0


### Q70. Do excessive hashtags hurt performance?


In [122]:
ig[ig['hashtags_count']>20]['engagement_rate'].mean()

np.float64(0.0871)

## H. PERFORMANCE & VIRALITY INSIGHTS

### Q71. What percentage of posts fall into each performance bucket?


In [124]:
ig['performance_bucket_label'].value_counts(normalize=True)

performance_bucket_label
viral     0.250008
high      0.250008
low       0.250008
medium    0.249975
Name: proportion, dtype: float64

### Q72. What differentiates viral posts from high-performing posts?


In [125]:
ig.groupby('performance_bucket_label')[['reach','engagement_rate']].mean()

Unnamed: 0_level_0,reach,engagement_rate
performance_bucket_label,Unnamed: 1_level_1,Unnamed: 2_level_1
high,6334.0428,0.049708
low,6221.994,0.017316
medium,6295.096413,0.033279
viral,6238.7716,0.068124


### Q73. Do viral posts have higher reach or higher engagement rate?


In [126]:
ig.groupby('performance_bucket_label')[['reach','engagement_rate']].corr()

Unnamed: 0_level_0,Unnamed: 1_level_0,reach,engagement_rate
performance_bucket_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
high,reach,1.0,0.020538
high,engagement_rate,0.020538,1.0
low,reach,1.0,-0.007772
low,engagement_rate,-0.007772,1.0
medium,reach,1.0,-0.007254
medium,engagement_rate,-0.007254,1.0
viral,reach,1.0,0.013154
viral,engagement_rate,0.013154,1.0


### Q74. Are viral posts driven more by shares or saves?


In [127]:
ig.groupby('performance_bucket_label')[['shares','saves']].mean()

Unnamed: 0_level_0,shares,saves
performance_bucket_label,Unnamed: 1_level_1,Unnamed: 2_level_1
high,17.596,51.889333
low,5.921867,17.6932
medium,11.679957,34.285905
viral,22.508267,66.1996


### Q75. Is follower growth significantly higher for viral posts?


In [133]:
ig.assign(is_viral = ig['performance_bucket_label'] == 'viral').groupby('is_viral')['followers_gained'].mean()

is_viral
False    501.93333
True     502.81120
Name: followers_gained, dtype: float64

### Q76. Do viral posts rely on timing, content, or format?


In [136]:
ig.assign(is_viral = ig['performance_bucket_label'] == 'viral').groupby(['post_hour','content_category','media_type'])['is_viral'].mean()

post_hour  content_category  media_type
0          Beauty            carousel      0.224490
                             image         0.190476
                             reel          0.156250
           Comedy            carousel      0.276596
                             image         0.316667
                                             ...   
23         Technology        image         0.282609
                             reel          0.172414
           Travel            carousel      0.230769
                             image         0.340909
                             reel          0.212121
Name: is_viral, Length: 720, dtype: float64

### Q78. Are viral posts repeatable by the same accounts?


In [139]:
ig.assign(is_viral = ig['performance_bucket_label'] == 'viral')['account_id'].value_counts()

account_id
15    1599
13    1599
17    1572
16    1569
4     1530
19    1517
12    1508
10    1502
5     1498
18    1494
3     1493
9     1480
20    1473
7     1472
8     1472
2     1471
1     1462
6     1452
14    1444
11    1392
Name: count, dtype: int64

### Q79. Do viral posts sacrifice engagement quality for reach?


In [140]:
ig.groupby('performance_bucket_label')['engagement_rate'].mean()

performance_bucket_label
high      0.049708
low       0.017316
medium    0.033279
viral     0.068124
Name: engagement_rate, dtype: float64

### Q80. What combination of factors most often leads to virality?


In [145]:
ig[ig['performance_bucket_label']=='viral'][['media_type','content_category']].value_counts()

media_type  content_category
image       Fashion             324
            Fitness             319
            Music               313
            Food                308
            Beauty              297
            Lifestyle           297
            Comedy              296
            Travel              292
            Technology          286
            Photography         280
carousel    Comedy              273
            Food                269
            Travel              268
            Photography         268
            Fashion             268
            Lifestyle           264
            Beauty              263
            Technology          263
            Music               263
            Fitness             248
reel        Fashion             212
            Music               192
            Photography         191
            Fitness             191
            Technology          184
            Travel              183
            Food                180

## I. ADVANCED BUSINESS / STRATEGY INSIGHTS

### Q81. What defines a high-performing Instagram post in this dataset?


In [148]:
ig[ig['performance_bucket_label']=='viral'].describe()

Unnamed: 0,account_id,follower_count,has_call_to_action,post_hour,likes,comments,shares,saves,reach,impressions,engagement_rate,followers_gained,caption_length,hashtags_count
count,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0,7500.0
mean,10.577467,10282.279733,0.345333,11.421467,445.670533,13.2272,22.508267,66.1996,6238.7716,8066.452933,0.068124,502.8112,119.998667,8.0096
std,5.761148,6719.50501,0.475509,6.90938,421.286415,13.320572,21.614142,63.549266,4997.894881,6523.576938,0.021513,291.499699,10.842629,2.83298
min,1.0,3083.0,0.0,0.0,0.0,0.0,0.0,0.0,380.0,476.0,0.0029,0.0,81.0,0.0
25%,5.0,4972.0,0.0,5.0,208.0,6.0,10.0,30.0,3049.75,3925.25,0.0603,251.0,113.0,6.0
50%,11.0,9044.0,0.0,11.0,340.0,10.0,17.0,51.0,4853.5,6308.5,0.065,502.0,120.0,8.0
75%,16.0,13798.0,1.0,17.0,553.0,17.0,28.0,83.0,7829.5,10094.25,0.0718,756.25,127.0,10.0
max,20.0,31095.0,1.0,23.0,10632.0,339.0,516.0,1542.0,54015.0,76171.0,0.271,1000.0,157.0,21.0


### Q82. Which factors contribute most to follower growth?


In [150]:
ig[['followers_gained','reach','impressions','engagement_rate','shares',
    'saves','likes','comments']].corr()['followers_gained'].sort_values(ascending=False)

followers_gained    1.000000
engagement_rate     0.000509
shares             -0.002571
saves              -0.003144
reach              -0.004840
likes              -0.005274
comments           -0.005703
impressions        -0.006178
Name: followers_gained, dtype: float64

### Q83. Can engagement rate alone predict performance buckets?


In [151]:
ig.groupby('performance_bucket_label')['engagement_rate'].mean()

performance_bucket_label
high      0.049708
low       0.017316
medium    0.033279
viral     0.068124
Name: engagement_rate, dtype: float64

### Q84. Are impressions or reach more important for virality?


In [152]:
ig.groupby('performance_bucket_label')[['reach','impressions']].corr()

Unnamed: 0_level_0,Unnamed: 1_level_0,reach,impressions
performance_bucket_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
high,reach,1.0,0.985786
high,impressions,0.985786,1.0
low,reach,1.0,0.986377
low,impressions,0.986377,1.0
medium,reach,1.0,0.9868
medium,impressions,0.9868,1.0
viral,reach,1.0,0.986339
viral,impressions,0.986339,1.0


### Q85. Which metric should creators optimize first?


In [153]:
ig[['followers_gained', 'engagement_rate', 'reach', 'impressions', 'shares',
    'saves']].corr()['followers_gained'].sort_values(ascending=False)

followers_gained    1.000000
engagement_rate     0.000509
shares             -0.002571
saves              -0.003144
reach              -0.004840
impressions        -0.006178
Name: followers_gained, dtype: float64

### Q86. Are there diminishing returns on posting frequency?


In [154]:
avg_engg = ig.groupby('account_id')['engagement_rate'].mean()
posting_freq = ig.groupby('account_id').size()
posting_freq.corr(avg_engg)

np.float64(0.15966759125039065)

### Q87. What is the best strategy for small accounts?


In [158]:
small = ig[ig['follower_count'] < ig['follower_count'].quantile(0.25)]

In [159]:
small.groupby(['media_type','content_category'])['engagement_rate'].mean()

media_type  content_category
carousel    Beauty              0.039022
            Comedy              0.044632
            Fashion             0.042988
            Fitness             0.040176
            Food                0.044305
            Lifestyle           0.040269
            Music               0.046097
            Photography         0.040612
            Technology          0.041399
            Travel              0.041428
image       Beauty              0.046309
            Comedy              0.041369
            Fashion             0.040952
            Fitness             0.042999
            Food                0.044436
            Lifestyle           0.043104
            Music               0.043998
            Photography         0.042977
            Technology          0.041609
            Travel              0.041712
reel        Beauty              0.044713
            Comedy              0.041003
            Fashion             0.045045
            Fitness         

### Q88. What content strategy should brands follow?


In [161]:
branded = ig[ig['account_type'] == 'brand']

In [162]:
branded.groupby(['media_type','content_category'])['engagement_rate'].mean()

media_type  content_category
carousel    Beauty              0.039172
            Comedy              0.041385
            Fashion             0.041045
            Fitness             0.043350
            Food                0.042533
            Lifestyle           0.042325
            Music               0.043416
            Photography         0.039467
            Technology          0.041498
            Travel              0.041020
image       Beauty              0.043180
            Comedy              0.042638
            Fashion             0.042897
            Fitness             0.041946
            Food                0.042815
            Lifestyle           0.040427
            Music               0.042774
            Photography         0.042134
            Technology          0.041394
            Travel              0.042620
reel        Beauty              0.043280
            Comedy              0.043708
            Fashion             0.045581
            Fitness         

### Q89. What mistakes lead to consistently low performance?


In [163]:
ig[ig['performance_bucket_label']=='Low'][['engagement_rate','hashtags_count','caption_length','reach','impressions']].describe()

Unnamed: 0,engagement_rate,hashtags_count,caption_length,reach,impressions
count,0.0,0.0,0.0,0.0,0.0
mean,,,,,
std,,,,,
min,,,,,
25%,,,,,
50%,,,,,
75%,,,,,
max,,,,,


### Q90. How can this data guide content planning decisions?

In [164]:
ig[ig['performance_bucket_label']=='high'].groupby(
    ['media_type','content_category','traffic_source']
).size().sort_values(ascending=False)


media_type  content_category  traffic_source
image       Technology        Reels Feed        64
                              Hashtags          63
            Fashion           Hashtags          60
            Lifestyle         Home Feed         59
carousel    Travel            Hashtags          58
                                                ..
reel        Photography       Profile           25
            Comedy            Explore           23
                              Home Feed         23
            Travel            Profile           21
            Food              Explore           18
Length: 180, dtype: int64

In [165]:
ig.groupby(['media_type','content_category'])['engagement_rate'].mean()


media_type  content_category
carousel    Beauty              0.040606
            Comedy              0.042030
            Fashion             0.041883
            Fitness             0.042297
            Food                0.042583
            Lifestyle           0.041702
            Music               0.043050
            Photography         0.040699
            Technology          0.041742
            Travel              0.041753
image       Beauty              0.042915
            Comedy              0.041382
            Fashion             0.042613
            Fitness             0.042992
            Food                0.042508
            Lifestyle           0.041673
            Music               0.042935
            Photography         0.041544
            Technology          0.042092
            Travel              0.041930
reel        Beauty              0.043432
            Comedy              0.042385
            Fashion             0.043610
            Fitness         