In [1]:
import pandas as pd

complaints = pd.read_csv("../data/amex_complaints_sentiment.csv")
churn = pd.read_csv("../outputs/churn_predictions.csv")


In [2]:
print("Complaint records:", len(complaints))
print("Churn records:", len(churn))

# Escalation rate
escalation_rate = complaints['consumer_disputed?'].str.lower().eq('yes').mean()
print("Escalation rate: {:.2%}".format(escalation_rate))

# Churn rate
churn_rate = churn['actual'].mean()
print("Churn rate: {:.2%}".format(churn_rate))

# Sentiment breakdown
sentiment_counts = complaints['sentiment_label'].value_counts(normalize=True).round(3)
print("Sentiment distribution:\n", sentiment_counts)


Complaint records: 5409
Churn records: 2026
Escalation rate: 9.30%
Churn rate: 16.14%
Sentiment distribution:
 sentiment_label
positive    0.524
negative    0.381
neutral     0.095
Name: proportion, dtype: float64


In [3]:
import os

output_dir = "../outputs"
outputs = os.listdir(output_dir)

print("Final output files:")
for f in outputs:
    print("-", f)


Final output files:
- churn_model.pkl
- churn_predictions.csv
- complaints_by_channel.csv
- escalation_model.pkl
- monthly_complaint_volume.csv
- response_vs_dispute.csv
- sentiment_by_issue.csv
- top_10_issues.csv


This project demonstrates an end-to-end analytics solution inspired by real American Express business problems. It includes:

- Cleaning and analyzing over 50,000+ consumer complaint records
- Using NLP to detect sentiment from complaint narratives
- Building a machine learning model to predict complaint escalation risk
- Building a separate model to predict customer churn
- Preparing dashboard-ready summaries for visualization in Tableau or Power BI

The tools used include Python, pandas, scikit-learn, VADER, joblib, and one-hot encoding. The project mimics real-world data analyst workflows and delivers insights for improving customer experience and reducing regulatory risks.
