In [2]:
import joblib
from datetime import datetime
import pandas as pd 

In [3]:
# Load the master data
X_master = pd.read_csv('../data/processed/master_features.csv')
tracking = pd.read_csv('../data/processed/tracking_labels.csv')

# Keep loyalty numbers and churn for analysis later
loyalty_numbers = tracking['Loyalty Number'].copy()
churn_labels = tracking['Churn'].copy()

# Focus on behavioral and value-based features
kmeans_features = [
    'Salary', 'CLV',
    'Total Flights', 'Distance',
    'Points Accumulated', 'Points Redeemed', 'Dollar Cost Points Redeemed',
    'Points Most Recent', 'Avg Monthly Points', 'Activity Volatility',
    'Overall Trend',
    'Customer Age (Years)'
]

# Select features
X_cluster = X_master[kmeans_features].copy()

print(f"\nClustering data shape: {X_cluster.shape}")


Clustering data shape: (15176, 12)


In [4]:
kmeans_model = joblib.load('../models/clustering_model.pkl')
scaler = joblib.load('../models/scaler.pkl')

In [5]:
X_cluster_scaled = scaler.transform(X_cluster)
clusters = kmeans_model.predict(X_cluster_scaled)

In [6]:
# Create cluster names
cluster_names = {
    0: 'Super User',
    1: 'Loyal Regular',
    2: 'Flight Risk'
}

In [7]:
cluster_names_list = [cluster_names[c] for c in clusters]


In [8]:
kmeans_predictions_df = pd.DataFrame({
    'Loyalty Number': loyalty_numbers,
    'Cluster ID': clusters,
    'Cluster Name': cluster_names_list,
    'Prediction Date': datetime.now().strftime('%Y-%m-%d')
})

if kmeans_predictions_df.count().nunique() != 1:
    print('Error: Mismatching Number of Values!')
else:
    print('Perfect File Created!')

print('===== Predictions File =====')
print('Value      |      Count')
print(kmeans_predictions_df.count())
          
kmeans_predictions_df.head(10)

Perfect File Created!
===== Predictions File =====
Value      |      Count
Loyalty Number     15176
Cluster ID         15176
Cluster Name       15176
Prediction Date    15176
dtype: int64


Unnamed: 0,Loyalty Number,Cluster ID,Cluster Name,Prediction Date
0,100018,1,Loyal Regular,2026-02-04
1,100102,1,Loyal Regular,2026-02-04
2,100140,1,Loyal Regular,2026-02-04
3,100214,2,Flight Risk,2026-02-04
4,100272,1,Loyal Regular,2026-02-04
5,100301,1,Loyal Regular,2026-02-04
6,100364,1,Loyal Regular,2026-02-04
7,100380,1,Loyal Regular,2026-02-04
8,100428,1,Loyal Regular,2026-02-04
9,100504,2,Flight Risk,2026-02-04


In [9]:
kmeans_predictions_df.to_csv('../predictions/cluster_predictions.csv')