In [1]:
import pandas as pd

# Load the sentiment-scored data
df = pd.read_csv("../data/amex_complaints_sentiment.csv")

# Confirm target column
print("✅ Unique labels in consumer_disputed?:", df['consumer_disputed?'].unique())


✅ Unique labels in consumer_disputed?: ['Unknown' 'Yes' 'No']


In [2]:
# Map target to binary: Yes → 1, No/Unknown → 0
df['disputed_flag'] = df['consumer_disputed?'].apply(lambda x: 1 if str(x).strip().lower() == 'yes' else 0)

print("✅ Class distribution:")
print(df['disputed_flag'].value_counts())


✅ Class distribution:
disputed_flag
0    4906
1     503
Name: count, dtype: int64


In [3]:
# Select features
features = ['vader_sentiment', 'submitted_via', 'issue']
X = df[features]
y = df['disputed_flag']

# One-hot encode categorical features
X_encoded = pd.get_dummies(X, columns=['submitted_via', 'issue'])

print("✅ Encoded feature shape:", X_encoded.shape)


✅ Encoded feature shape: (5409, 46)


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

print("✅ Training set size:", X_train.shape)
print("✅ Test set size:", X_test.shape)


✅ Training set size: (4327, 46)
✅ Test set size: (1082, 46)


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("✅ Model trained.")
print("📋 Classification Report:\n", classification_report(y_test, y_pred, target_names=["Not Disputed", "Disputed"]))


✅ Model trained.
📋 Classification Report:
               precision    recall  f1-score   support

Not Disputed       0.94      0.94      0.94      1001
    Disputed       0.27      0.30      0.28        81

    accuracy                           0.89      1082
   macro avg       0.61      0.62      0.61      1082
weighted avg       0.89      0.89      0.89      1082



In [7]:
import joblib

joblib.dump(model, "../outputs/escalation_model.pkl")
print("Model saved to: ../outputs/escalation_model.pkl")


Model saved to: ../outputs/escalation_model.pkl
