In [1]:
import pandas as pd
import pickle

In [11]:
def customer_segment_api(data):
    
    # Load saved Model
    scaler = pickle.load(open("scaler.pkl", "rb"))
    cluster = pickle.load(open("cluster.pkl", "rb"))
    cluster_encode = pickle.load( open("cluster_encode.pkl", "rb"))
    
    # Compute Monetary Value
    data['Monetary'] = data['Quantity'] * data['UnitPrice']

    # Compute Recency in days with respect to last date in the dataset
    data['Recency'] = (data['InvoiceDate'].describe()['last'] - data['InvoiceDate']).dt.days
    
    # RFM Feature Extraction
    rfm_data = data.groupby(['CustomerID']).agg({'Recency':'min','InvoiceNo':'nunique','Monetary':'sum'}).reset_index()
    rfm_data.rename({'InvoiceNo':'Frequency'},axis=1,inplace=True)
    
    # Feature Scaling
    rfm_data[['Recency','Frequency','Monetary']] = scaler.transform(rfm_data[['Recency','Frequency','Monetary']])
    
    # Predict Cluster Label
    rfm_data['Cluster'] = cluster.predict(rfm_data[['Recency','Frequency','Monetary']])
    
    # Label Cluster 
    rfm_data['Customer_Segment'] = rfm_data['Cluster'].map(cluster_encode)
    
    return rfm_data

In [3]:
data = pd.read_excel('Online Retail.xlsx')

In [12]:
rfm = customer_segment_api(data)
rfm.head()

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,Cluster,Customer_Segment
0,12346.0,0.871314,0.021739,0.0,3,Bronze
1,12347.0,0.002681,0.130435,0.201126,2,Gold
2,12348.0,0.198391,0.065217,0.083868,2,Gold
3,12349.0,0.048257,0.0,0.082016,2,Gold
4,12350.0,0.828418,0.0,0.015605,3,Bronze


In [14]:
rfm['Customer_Segment'].value_counts()

Gold        2587
Silver       770
Bronze       625
Platinum     390
Name: Customer_Segment, dtype: int64