In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv('XGB_predictions_JFM(Final)_with_reasonsbuckets_clv.csv')

# Define thresholds for high/mid/low based on the overall dataset
high_payment_threshold = data['Total Premium Payable '].quantile(0.75)
mid_payment_threshold = data['Total Premium Payable '].quantile(0.5)
low_payment_threshold = data['Total Premium Payable '].quantile(0.25)

high_discount_threshold = data['Applicable Discount with NCB'].quantile(0.75)
mid_discount_threshold = data['Applicable Discount with NCB'].quantile(0.5)
low_discount_threshold = data['Applicable Discount with NCB'].quantile(0.25)

high_clv_threshold = data['CLV'].quantile(0.75)
mid_clv_threshold = data['CLV'].quantile(0.5)
low_clv_threshold = data['CLV'].quantile(0.25)

# Assuming 'Churn Probability' is a column in the dataset
high_churn_probability_threshold = data['Churn Probability'].quantile(0.75)
mid_churn_probability_threshold = data['Churn Probability'].quantile(0.5)
low_churn_probability_threshold = data['Churn Probability'].quantile(0.25)

#Assign CLV and Payment categories
data['clv_category'] = data['CLV'].apply(
    lambda x: 'High' if x > high_clv_threshold else ('Mid' if x > mid_clv_threshold else 'Low')
)
data['payment_category'] = data['Total Premium Payable '].apply(
    lambda x: 'High' if x > high_payment_threshold else ('Mid' if x > mid_payment_threshold else 'Low')
)
# Assign discount categories
data['discount_category'] = data['Applicable Discount with NCB'].apply(
    lambda x: 'High' if x > high_discount_threshold else ('Mid' if x > mid_discount_threshold else 'Low')
)
#Determine churn status from a column indicating churn, assuming 'Not Renewed' indicates churn
data['churn_category'] = data['Churn Probability'].apply(
     lambda x: 'High' if x > high_churn_probability_threshold else ('Mid' if x > mid_churn_probability_threshold else 'Low')
)

# Filter the dataset for Not Renewed policies
not_renewed_data = data[data['Predicted Status'] == 'Not Renewed']

# Segment Single Loan Customers using all possible combinations
def segment_policy(row):
    # High Value Policy combinations
    if row['churn_category'] == 'Low':
        if row['payment_category'] == 'High':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'High Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'High Value Policy'
        elif row['payment_category'] == 'Mid':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['Mid', 'Low']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'High':
                    return 'High Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'High':
                    return 'High Value Policy'
        elif row['payment_category'] == 'Low':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] in ['Mid', 'Low']:
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'High':
                    return 'High Value Policy'
    
    # Mid Value Policy combinations
    if row['churn_category'] == 'Mid':
        if row['payment_category'] == 'High':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'Mid Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'Mid Value Policy'
        elif row['payment_category'] == 'Mid':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['Mid', 'Low']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Mid Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Mid Value Policy'
        elif row['payment_category'] == 'Low':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] in ['Mid', 'Low']:
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Mid Value Policy'

    # Low value Policy combinations
    elif row['churn_category'] == 'High':
        if row['payment_category'] == 'High':
            if row['discount_category'] == 'High':
                if row['clv_category'] == 'High':
                    return 'Low Value Policy'
                elif row['clv_category'] == 'Mid':
                    return 'Low Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'Low Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'Low Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'Low Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] == 'High':
                    return 'Low Value Policy'
                elif row['clv_category'] in ['Mid', 'Low']:
                    return 'Low Value Policy'
        elif row['payment_category'] == 'Mid':
            if row['discount_category'] == 'High':
                if row['clv_category'] == 'High':
                    return 'Low Value Policy'
                elif row['clv_category'] in ['Mid', 'Low']:
                    return 'Low Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['Mid', 'Low']:
                    return 'Low Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Low Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'Low Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Low Value Policy'
        elif row['payment_category'] == 'Low':
            if row['discount_category'] in ['High', 'Mid', 'Low']:
                if row['clv_category'] in ['High', 'Mid', 'Low']:
                    return 'Low Value Policy'
    
    # Default fallback
    return 'Other'


# Apply the segmentation function to the Not Renewed dataset
not_renewed_data['policy_segment'] = not_renewed_data.apply(segment_policy, axis=1)

# If you want to merge this back to the original dataset
data = data.merge(not_renewed_data[['Policy No', 'policy_segment']], on='Policy No', how='left')

# Save the results to a CSV file
data.to_csv('XGB_predictions_JFM(Final)_policysegment.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  not_renewed_data['policy_segment'] = not_renewed_data.apply(segment_policy, axis=1)


In [2]:
# Print all threshold values
print("Threshold Values:")
print(f"Total Premium Payable - High: {high_payment_threshold}, Mid: {mid_payment_threshold}, Low: {low_payment_threshold}")
print(f"Applicable Discount with NCB - High: {high_discount_threshold}, Mid: {mid_discount_threshold}, Low: {low_discount_threshold}")
print(f"CLV - High: {high_clv_threshold}, Mid: {mid_clv_threshold}, Low: {low_clv_threshold}")
print(f"Churn Probability - High: {high_churn_probability_threshold}, Mid: {mid_churn_probability_threshold}, Low: {low_churn_probability_threshold}")

Threshold Values:
Total Premium Payable - High: 19057.0, Mid: 14031.0, Low: 9959.0
Applicable Discount with NCB - High: 77.5, Mid: 70.0, Low: 57.5
CLV - High: 0.04951747597725593, Mid: 0.02967564122724975, Low: 0.0202908386648237
Churn Probability - High: 0.709570535, Mid: 0.5985222800000001, Low: 0.4987499125


In [1]:
import pandas as pd

data = pd.read_csv('XGB_predictions_JFM(Final)_policysegment.csv') 


# Function to determine customer segment
def determine_customer_segment(group):
    policy_counts = group['policy_segment'].fillna('Blank').value_counts()

    # If only one policy, map directly
    if len(group) == 1:
        policy = group.iloc[0]['policy_segment']
        if policy == 'High Value Policy':
            return 'Elite Retainers'
        elif policy == 'Mid Value Policy':
            return 'Potential Growth Customers'
        elif policy == 'Low Value Policy':
            return 'Low Value Customers'
        else:
            return ''  # Empty if no policy

    # Count different policy types
    high_count = policy_counts.get('High Value Policy', 0)
    mid_count = policy_counts.get('Mid Value Policy', 0)
    low_count = policy_counts.get('Low Value Policy', 0)
    blank_count = policy_counts.get('Blank', 0)  # Count empty cells

    # Assign segment based on highest count
    if high_count > max(mid_count, low_count, blank_count):
        return 'Elite Retainers'
    elif mid_count > max(high_count, low_count, blank_count):
        return 'Potential Growth Customers'
    elif low_count > max(high_count, mid_count, blank_count):
        return 'Low Value Customers'
    elif blank_count > max(high_count, mid_count, low_count):
        return 'Low Value Customers'

    # Tie-breaking logic
    if high_count == mid_count and high_count > 0:
        return 'Elite Retainers'
    elif high_count == low_count and high_count > 0:
        return 'Potential Growth Customers'
    elif low_count == mid_count and low_count > 0:
        return 'Potential Growth Customers'
    elif high_count == mid_count == low_count == blank_count:
        return 'Low Value Customers'

    return 'Low Value Customers'  # Default case

# Apply function correctly to each customer group and assign back
customer_segments = data.groupby('CustomerID').apply(determine_customer_segment)
data['customer_segment'] = data['CustomerID'].map(customer_segments)

# Save the result to CSV
data.to_csv('XGB_predictions_JFM(Final)_cussegment.csv', index=False)

  customer_segments = data.groupby('CustomerID').apply(determine_customer_segment)


In [1]:
import pandas as pd

df = pd.read_csv('XGB_predictions_JFM(Final)_with_reasonsbuckets_clv.csv')

# Compute customer-wise average values
customer_avg = df.groupby('CustomerID', as_index=False).agg({
    'Total Premium Payable ': 'mean',
    'Applicable Discount with NCB': 'mean',
    'Churn Probability': 'mean'
})

# Rename columns for clarity
customer_avg.rename(columns={
    'Total Premium Payable ': 'Avg_Premium',
    'Applicable Discount with NCB': 'Avg_Discount',
    'Churn Probability': 'Avg_Churn_Probability'
}, inplace=True)

# Merge back to original dataframe
df = df.merge(customer_avg, on='CustomerID', how='left')

df.to_csv('XGB_predictions_JFM(Final)_with_avgvalues.csv', index=False)

In [2]:
import pandas as pd

# Load the dataset
data = pd.read_csv('XGB_predictions_JFM(Final)_with_avgvalues.csv')

# Define thresholds for high/mid/low based on the overall dataset
high_payment_threshold = data['Avg_Premium'].quantile(0.75)
mid_payment_threshold = data['Avg_Premium'].quantile(0.5)
low_payment_threshold = data['Avg_Premium'].quantile(0.25)

high_discount_threshold = data['Avg_Discount'].quantile(0.75)
mid_discount_threshold = data['Avg_Discount'].quantile(0.5)
low_discount_threshold = data['Avg_Discount'].quantile(0.25)

high_clv_threshold = data['CLV'].quantile(0.75)
mid_clv_threshold = data['CLV'].quantile(0.5)
low_clv_threshold = data['CLV'].quantile(0.25)

# Assuming 'Churn Probability' is a column in the dataset
high_churn_probability_threshold = data['Avg_Churn_Probability'].quantile(0.75)
mid_churn_probability_threshold = data['Avg_Churn_Probability'].quantile(0.5)
low_churn_probability_threshold = data['Avg_Churn_Probability'].quantile(0.25)

#Assign CLV and Payment categories
data['clv_category'] = data['CLV'].apply(
    lambda x: 'High' if x > high_clv_threshold else ('Mid' if x > mid_clv_threshold else 'Low')
)
data['payment_category'] = data['Avg_Premium'].apply(
    lambda x: 'High' if x > high_payment_threshold else ('Mid' if x > mid_payment_threshold else 'Low')
)
# Assign discount categories
data['discount_category'] = data['Avg_Discount'].apply(
    lambda x: 'High' if x > high_discount_threshold else ('Mid' if x > mid_discount_threshold else 'Low')
)
#Determine churn status from a column indicating churn, assuming 'Not Renewed' indicates churn
data['churn_category'] = data['Avg_Churn_Probability'].apply(
     lambda x: 'High' if x > high_churn_probability_threshold else ('Mid' if x > mid_churn_probability_threshold else 'Low')
)

# Filter the dataset for Not Renewed policies
not_renewed_data = data[data['Predicted Status'] == 'Not Renewed']

# Segment Single Loan Customers using all possible combinations
def segment_policy(row):
    # High Value Policy combinations
    if row['churn_category'] == 'Low':
        if row['payment_category'] == 'High':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'High Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'High Value Policy'
        elif row['payment_category'] == 'Mid':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['Mid', 'Low']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'High':
                    return 'High Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'High':
                    return 'High Value Policy'
        elif row['payment_category'] == 'Low':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] in ['Mid', 'Low']:
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'High Value Policy'
                elif row['clv_category'] == 'High':
                    return 'High Value Policy'
    
    # Mid Value Policy combinations
    if row['churn_category'] == 'Mid':
        if row['payment_category'] == 'High':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'Mid Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'Mid Value Policy'
        elif row['payment_category'] == 'Mid':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['Mid', 'Low']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Mid Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Mid Value Policy'
        elif row['payment_category'] == 'Low':
            if row['discount_category'] == 'High':
                return 'Mid Value Policy'
            elif row['discount_category'] in ['Mid', 'Low']:
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'Mid Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Mid Value Policy'

    # Low value Policy combinations
    elif row['churn_category'] == 'High':
        if row['payment_category'] == 'High':
            if row['discount_category'] == 'High':
                if row['clv_category'] == 'High':
                    return 'Low Value Policy'
                elif row['clv_category'] == 'Mid':
                    return 'Low Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'Low Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['High', 'Mid']:
                    return 'Low Value Policy'
                elif row['clv_category'] == 'Low':
                    return 'Low Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] == 'High':
                    return 'Low Value Policy'
                elif row['clv_category'] in ['Mid', 'Low']:
                    return 'Low Value Policy'
        elif row['payment_category'] == 'Mid':
            if row['discount_category'] == 'High':
                if row['clv_category'] == 'High':
                    return 'Low Value Policy'
                elif row['clv_category'] in ['Mid', 'Low']:
                    return 'Low Value Policy'
            elif row['discount_category'] == 'Mid':
                if row['clv_category'] in ['Mid', 'Low']:
                    return 'Low Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Low Value Policy'
            elif row['discount_category'] == 'Low':
                if row['clv_category'] in ['Low', 'Mid']:
                    return 'Low Value Policy'
                elif row['clv_category'] == 'High':
                    return 'Low Value Policy'
        elif row['payment_category'] == 'Low':
            if row['discount_category'] in ['High', 'Mid', 'Low']:
                if row['clv_category'] in ['High', 'Mid', 'Low']:
                    return 'Low Value Policy'
    
    # Default fallback
    return 'Other'


# Apply the segmentation function to the Not Renewed dataset
not_renewed_data['policy_segment'] = not_renewed_data.apply(segment_policy, axis=1)

# If you want to merge this back to the original dataset
data = data.merge(not_renewed_data[['Policy No', 'policy_segment']], on='Policy No', how='left')

# Save the results to a CSV file
data.to_csv('XGB_predictions_JFM(Final)_with_avgvalues_seg.csv', index=False)

# Print all threshold values
print("Threshold Values:")
print(f"Total Premium Payable - High: {high_payment_threshold}, Mid: {mid_payment_threshold}, Low: {low_payment_threshold}")
print(f"Applicable Discount with NCB - High: {high_discount_threshold}, Mid: {mid_discount_threshold}, Low: {low_discount_threshold}")
print(f"CLV - High: {high_clv_threshold}, Mid: {mid_clv_threshold}, Low: {low_clv_threshold}")
print(f"Churn Probability - High: {high_churn_probability_threshold}, Mid: {mid_churn_probability_threshold}, Low: {low_churn_probability_threshold}")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  not_renewed_data['policy_segment'] = not_renewed_data.apply(segment_policy, axis=1)


Threshold Values:
Total Premium Payable - High: 18921.25, Mid: 14121.0, Low: 10230.25
Applicable Discount with NCB - High: 77.5, Mid: 69.92, Low: 57.5
CLV - High: 0.04951747597725593, Mid: 0.02967564122724975, Low: 0.0202908386648237
Churn Probability - High: 0.704417744, Mid: 0.59857885, Low: 0.5022930875


In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv('XGB_predictions_JFM(Final)_with_avgvalues.csv')

high_discount_threshold = data['Avg_Discount'].quantile(0.75)
mid_discount_threshold = data['Avg_Discount'].quantile(0.5)
low_discount_threshold = data['Avg_Discount'].quantile(0.25)

high_clv_threshold = data['CLV'].quantile(0.75)
mid_clv_threshold = data['CLV'].quantile(0.5)
low_clv_threshold = data['CLV'].quantile(0.25)

high_churn_probability_threshold = data['Avg_Churn_Probability'].quantile(0.75)
mid_churn_probability_threshold = data['Avg_Churn_Probability'].quantile(0.5)
low_churn_probability_threshold = data['Avg_Churn_Probability'].quantile(0.25)

# Assign CLV, Payment, Discount, and Churn categories
data['clv_category'] = data['CLV'].apply(
    lambda x: 'High' if x > high_clv_threshold else ('Mid' if x > mid_clv_threshold else 'Low')
)
data['discount_category'] = data['Avg_Discount'].apply(
    lambda x: 'High' if x > high_discount_threshold else ('Mid' if x > mid_discount_threshold else 'Low')
)
data['churn_category'] = data['Avg_Churn_Probability'].apply(
    lambda x: 'High' if x > high_churn_probability_threshold else ('Mid' if x > mid_churn_probability_threshold else 'Low')
)

# Count the number of policies per customer
data['Policy Count'] = data.groupby('CustomerID')['Policy No'].transform('count')

# Ensure 'Not All Renewed' column exists properly
customer_policy_status = data.groupby('CustomerID')['Predicted Status'].apply(
    lambda x: 'All Renewed' if all(x == 'Renewed') else 'Not All Renewed'
).reset_index(name='Renewal Status')  # Renaming column explicitly

# Merge back with the main dataset
data = data.merge(customer_policy_status, on='CustomerID', how='left')

# Fill any missing values in 'Renewal Status'
data['Renewal Status'] = data['Renewal Status'].fillna('All Renewed')

# Function to segment customers based on the new criteria
def segment_policy(row):
    if row['Renewal Status'] == 'Not All Renewed':  # Process only those who have at least one Not Renewed policy
        if row['churn_category'] == 'Mid' and row['discount_category'] in ['Mid', 'Low'] and row['clv_category'] in ['High', 'Mid']:
            return 'Elite Retainers'
        elif row['churn_category'] in ['Low', 'Mid'] and row['discount_category'] in ['High', 'Mid', 'Low'] and row['clv_category'] in ['High', 'Mid', 'Low']:
            return 'Potential Customers'
        elif row['churn_category'] == 'High' and row['discount_category'] in ['High', 'Mid', 'Low'] and row['clv_category'] in ['High', 'Mid', 'Low']:
            return 'Low Value Customers'
    return None

# Apply the segmentation function
data['Customer Segment'] = data.apply(segment_policy, axis=1)

# Save the results to a CSV file
data.to_csv('XGB_predictions_JFM(Final)_with_avgvalues_seg.csv', index=False)

# Print threshold values
print("Threshold Values:")
print(f"Applicable Discount with NCB - High: {high_discount_threshold}, Mid: {mid_discount_threshold}, Low: {low_discount_threshold}")
print(f"CLV - High: {high_clv_threshold}, Mid: {mid_clv_threshold}, Low: {low_clv_threshold}")
print(f"Churn Probability - High: {high_churn_probability_threshold}, Mid: {mid_churn_probability_threshold}, Low: {low_churn_probability_threshold}")

Threshold Values:
Applicable Discount with NCB - High: 77.5, Mid: 69.92, Low: 57.5
CLV - High: 0.04951747597725593, Mid: 0.02967564122724975, Low: 0.0202908386648237
Churn Probability - High: 0.704417744, Mid: 0.59857885, Low: 0.5022930875
