In [1]:
# 06_final_business_recommendations.ipynb
import pandas as pd

In [31]:
# Load cleaned dataset
df = pd.read_csv('../data/customer_behavior_clean.csv')

In [25]:
# If 'Churned' exists
df['churned'] = df['churned'].map({'Yes': 1, 'No': 0})
df['churned'].head()

0    1
1    0
2    0
3    1
4    0
Name: churned, dtype: int64

In [22]:
# At-risk segment: customers who have churned or are likely to churn
# 1 → customer has stopped using your service or is at risk of leaving.
# 0 → customer did not churn / still active
at_risk = df[df['churned'] == 1]

# Target with retention emails & coupons
print("Send retention emails/coupons to these customers:")
print(at_risk[['customer_id','age','gender','annual_income','spending_score']])

# Improve support responsiveness
# You can flag these customers for priority support
at_risk['priority_support'] = True

# Loyalty program tied to spending_score
# High spending_score but at-risk customers get loyalty rewards
loyalty_targets = at_risk[at_risk['spending_score'] > 50]  # example threshold
print("\nLoyalty program targets based on spending_score:")
print(loyalty_targets[['customer_id','spending_score']].head(5))

Send retention emails/coupons to these customers:
     customer_id  age  gender  annual_income  spending_score
0           1001   56  Female          70990              35
3           1004   60    Male         104791              49
7           1008   36  Female         115084              50
10          1011   28  Female          81813               9
11          1012   41  Female          47712              34
..           ...  ...     ...            ...             ...
274         1275   49  Female          87584              54
286         1287   28  Female         107958              46
288         1289   53  Female          22219              68
296         1297   42  Female         113070              82
298         1299   35  Female         117829              74

[85 rows x 5 columns]

Loyalty program targets based on spending_score:
    customer_id  spending_score
12         1013              76
17         1018              64
20         1021              64
21         1022  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  at_risk['priority_support'] = True


In [28]:
# Define clusters
def assign_cluster(row):
    if row['annual_income'] > 80000 and row['spending_score'] < 50:
        return 'High-income/Low-spend'
    elif 50000 <= row['annual_income'] <= 80000 and 50 <= row['spending_score'] <= 80:
        return 'Mid-income/Mid-spend'
    elif row['annual_income'] < 50000 and row['spending_score'] > 70:
        return 'Low-income/High-spend'
    else:
        return 'Other'

df['cluster'] = df.apply(assign_cluster, axis=1)

# Assign marketing strategies
def marketing_strategy(cluster):
    if cluster == 'High-income/Low-spend':
        return 'Premium bundles & concierge chat'
    elif cluster == 'Mid-income/Mid-spend':
        return 'Seasonal discounts'
    elif cluster == 'Low-income/High-spend':
        return 'Membership deals'
    else:
        return 'General marketing'

df['marketing_strategy'] = df['cluster'].apply(marketing_strategy)

# Show results
print("\nSegmented marketing strategies:")
df[['customer_id','annual_income','spending_score','cluster','marketing_strategy']]


Segmented marketing strategies:


Unnamed: 0,customer_id,annual_income,spending_score,cluster,marketing_strategy
0,1001,70990,35,Other,General marketing
1,1002,95672,94,Other,General marketing
2,1003,85545,95,Other,General marketing
3,1004,104791,49,High-income/Low-spend,Premium bundles & concierge chat
4,1005,78141,69,Mid-income/Mid-spend,Seasonal discounts
...,...,...,...,...,...
295,1296,39216,48,Other,General marketing
296,1297,113070,82,Other,General marketing
297,1298,99767,7,High-income/Low-spend,Premium bundles & concierge chat
298,1299,117829,74,Other,General marketing


In [None]:
# Identify specific review insights 
# Pricing complaints from negative reviews
pricing_complaints = df[df['sentiment'] == 'negative']
pricing_complaints = pricing_complaints[pricing_complaints['clean_review']
                                        .str.contains('expensive|price|cost', na=False)]
print("\nPricing complaints to address:")
print(pricing_complaints[['customer_id','review_text']].head(10))

# Positive highlights (delivery, quality)
positive_highlights = df[df['sentiment'] == 'positive']
positive_highlights = positive_highlights[positive_highlights['clean_review']
                                          .str.contains('fast|delivery|good|quality|excellent', na=False)]
print("\nPositive highlights to emphasize:")
print(positive_highlights[['customer_id','review_text']].head(10))


Pricing complaints to address:
     customer_id                    review_text
2           1003  Too expensive for the quality
7           1008  Too expensive for the quality
11          1012  Too expensive for the quality
12          1013  Too expensive for the quality
31          1032  Too expensive for the quality
36          1037  Too expensive for the quality
43          1044  Too expensive for the quality
63          1064  Too expensive for the quality
106         1107  Too expensive for the quality
107         1108  Too expensive for the quality

Positive highlights to emphasize:
    customer_id                       review_text
5          1006  Fast delivery and good packaging
10         1011  Fast delivery and good packaging
13         1014     Great quality, very satisfied
23         1024     Great quality, very satisfied
24         1025  Fast delivery and good packaging
28         1029     Great quality, very satisfied
34         1035  Fast delivery and good packaging
50   

In [None]:
"""
6. Final Business Recommendations 
This section integrates insights from churn modeling, clustering, and NLP analysis to provide actionable strategies.
1. Steps to Reduce Churn:
    •	Target at-risk customers (churned or likely to churn) with personalized retention emails and discount coupons.
    •	Flag these customers for priority support to improve responsiveness and customer satisfaction.
    •	Launch a loyalty program tied to spending_score, rewarding high-value at-risk customers to encourage continued engagement.
2. Segment-Specific Marketing Strategies:
    •	High-income / Low-spend: Offer premium bundles, concierge services, and exclusive experiences to encourage higher spending.
    •	Mid-income / Mid-spend: Run seasonal discounts and value-added promotions to maintain engagement.
    •	Low-income / High-spend: Provide membership deals, flash sales, and buy-now-pay-later schemes to enhance loyalty.
3. Operational / Product Improvements:
    •	Address pricing complaints highlighted in negative reviews, ensuring products are perceived as good value.
    •	Emphasize strengths from positive reviews, such as fast delivery, quality packaging, and product reliability, in marketing campaigns.
    
These recommendations combine predictive insights, customer segmentation, and textual feedback to reduce churn, boost engagement, and improve overall customer satisfaction.

"""