In [17]:
# Cluster level user identification and profiling

import pandas as pd
df = pd.read_csv("C:\\Users\\User\\.vscode\\APP USER BEHAVIOR SEGMENTATION SYSTEM\\outputs\\final_clustered_df.csv")

# Extracted user lists for each cluster (High, Moderate, Low, Occasional users)

users_per_cluster = df.groupby('user_category')['user_id'].apply(list)

for category, users in users_per_cluster.items():
    print(f"\n{category} (sample users):")
    print(users)




high_engagement_users (sample users):
[100003, 100005, 100007, 100009, 100011, 100018, 100019, 100022, 100023, 100024, 100032, 100033, 100034, 100035, 100038, 100039, 100040, 100045, 100049, 100050, 100056, 100057, 100059, 100067, 100084, 100088, 100092, 100093, 100097, 100103, 100108, 100109, 100113, 100114, 100116, 100117, 100119, 100120, 100123, 100125, 100133, 100138, 100139, 100145, 100146, 100149, 100151, 100152, 100154, 100156, 100161, 100162, 100168, 100169, 100170, 100179, 100180, 100185, 100188, 100190, 100193, 100194, 100195, 100198, 100199, 100202, 100204, 100216, 100220, 100222, 100224, 100225, 100226, 100227, 100229, 100233, 100237, 100243, 100250, 100257, 100265, 100270, 100272, 100276, 100280, 100281, 100282, 100283, 100285, 100286, 100287, 100290, 100292, 100293, 100295, 100297, 100298, 100301, 100307, 100312, 100314, 100318, 100319, 100321, 100326, 100328, 100332, 100334, 100338, 100342, 100361, 100363, 100367, 100369, 100375, 100382, 100383, 100385, 100386, 100388, 

In [18]:
# Counted the number of customers in each group

cluster_counts = df['user_category'].value_counts()
print(cluster_counts)


user_category
high_engagement_users        15167
moderate_engagement_users    14724
low_engagement_users         11716
occasional_users              8393
Name: count, dtype: int64


In [19]:
# Analyzed average engagement, session behavior, and churn risk per cluster

cluster_analysis = df.groupby('cluster')[
    ['sessions_per_week',
     'avg_session_duration_min',
     'daily_active_minutes',
     'feature_clicks_per_session',
     'in_app_search_count',
     'pages_viewed_per_session',
     'notifications_opened_per_week',
     'churn_risk_score',
     'engagement_score']
].mean()

print(cluster_analysis)

         sessions_per_week  avg_session_duration_min  daily_active_minutes  \
cluster                                                                      
0                 7.982759                 13.038483             44.993354   
1                 8.023829                 13.205416             45.113927   
2                 8.071516                 13.093237             45.093480   
3                 7.929650                 13.260061             45.393757   

         feature_clicks_per_session  in_app_search_count  \
cluster                                                    
0                         11.993940             4.009133   
1                         11.959013             4.000953   
2                         11.982138             4.013244   
3                         12.090591             3.976462   

         pages_viewed_per_session  notifications_opened_per_week  \
cluster                                                            
0                       13.501024 

In [20]:
# Created customer-level profiles to understand behavior patterns within each group
customer_profile = df.groupby('cluster').agg({
    'user_id': 'count',
    'age': 'count',
    'engagement_score' : 'mean',
    'churn_risk_score' : 'mean'
})

print(customer_profile.sort_values(by='engagement_score'))

         user_id    age  engagement_score  churn_risk_score
cluster                                                    
1           8393   8393         64.841125          0.498885
0          11716  11716         64.939334          0.506661
2          14724  14724         64.949150          0.501902
3          15167  15167         64.987693          0.496376


In [21]:
# 10. Business Insight Generation & Customer Action Mapping
# Identified high-value customers for loyalty and premium offers

high_value_customers = df[df['user_category'] == 'high_engagement_users']
print(high_value_customers)

       user_id  age  gender    country device_type  app_version  \
3       100003   25    male      india     android          2.0   
5       100005   56    male      india     android          1.0   
7       100007   40  female         uk     android          2.1   
9       100009   28    male  singapore     android          1.2   
11      100011   53    male     canada     android          2.1   
...        ...  ...     ...        ...         ...          ...   
49984   149984   50  female         uk     android          2.0   
49987   149987   42  female        usa         web          2.1   
49988   149988   49  female      india         ios          1.2   
49991   149991   51    male      india     android          1.1   
49999   149999   26  female  singapore         web          1.0   

       sessions_per_week  avg_session_duration_min  daily_active_minutes  \
3                      5                      3.98                 65.38   
5                     12                   

INSIGHT : In the given dataset, it is found that the high number of users fall under high engagement category which shows their highest engagement score and reduced churn risk score. These customers need to be given exciting offers to increase their usability. They are the premium users of the application, hence should be rewarded / provided explicit discounts while utilizing the features of the application.

In [22]:
# Flagged low-engagement and at-risk customers for retention campaigns
at_risk_customers = df[df['user_category'] == 'occasional_users']
print(at_risk_customers)

       user_id  age  gender    country device_type  app_version  \
8       100008   28  female    germany     android          1.0   
13      100013   41    male        usa     android          1.2   
28      100028   45  female    germany     android          1.1   
41      100041   26  female  singapore         web          1.1   
48      100048   52  female      india     android          1.0   
...        ...  ...     ...        ...         ...          ...   
49963   149963   29  female      india     android          2.0   
49974   149974   25    male      india     android          2.1   
49977   149977   42    male      india         web          1.0   
49995   149995   23    male    germany     android          1.1   
49996   149996   34  female      india         ios          1.1   

       sessions_per_week  avg_session_duration_min  daily_active_minutes  \
8                      7                     26.47                 64.74   
13                     8                   

INSIGHT : In our dataset, compared to low engagement users, occasional / at risk users' count is high. This is because some people just use the app only when they need it. Besides, some users just open often but dont utlize its features properly. To make them retain / utilize the application, new features and ads on their relevant products can be introduced,  hence increasing their interests towards the application.

In [23]:
# Designed personalized engagement strategies for moderate users
moderate_customers = df[df['user_category'] == 'moderate_engagement_users']
print(moderate_customers)

       user_id  age  gender    country device_type  app_version  \
1       100001   46    male         uk         ios          1.0   
4       100004   38    male  australia     android          1.0   
6       100006   36  female      india     android          1.2   
12      100012   57    male        usa         ios          1.1   
14      100014   20    male        usa         ios          1.1   
...        ...  ...     ...        ...         ...          ...   
49982   149982   28    male  australia     android          2.0   
49989   149989   21   other      india         ios          1.1   
49990   149990   56  female    germany         ios          2.0   
49997   149997   43    male      india         ios          1.2   
49998   149998   41    male      india         ios          1.2   

       sessions_per_week  avg_session_duration_min  daily_active_minutes  \
1                      8                     24.44                 42.03   
4                     10                   

INSIGHT : These moderate usera are the real asset. Because they neither use much nor they just be as such. They know to utlize the application wisely. They utilize the features of the app thoroughly but still dont use often. Gifts, rewards and some accomplishments might make these users still more engaging.

# Supported marketing, product, and retention decisions based on customer segments

INSIGHT : For high engagement users, no new retention campaigna are needed, instead they could be upgraded to loyalty subcribers . gold users by providng subscription for free of cost for some period of time.
For moderate users, new products related to their specific interests can be introduced / special features for the existing app specifications can be improvised.
For low and occasional users, marketing need to be done such as imrpoved number of notifications, providing recommendations, advertising products related to their specific interests.

# Enabled customer-level targeting instead of generic campaigns
Marketing camapigns should be more focused and targetted towards customers to increase users' app engagement, reduce risks, app usability. The campaign should mainly focus on the 'at risk users' to make them retain, utilize app features properly. 