# Model Inference

In [2]:
# import library 
import pandas as pd
import pickle
from datetime import datetime

In [3]:
# load the files ! 
with open('model_scaler.pkl', 'rb') as file_1 : # rb = read binary
    model_scaler = pickle.load(file_1)
    
with open('model_pca.pkl', 'rb') as file_2 : # rb = read binary
    model_pca = pickle.load(file_2)
    
with open('model_km.pkl', 'rb') as file_3 : # rb = read binary
    model_km = pickle.load(file_3)

In [4]:
#Create new data
df_inf = {
    "InvoiceNo" : '50000',
    "StockCode" : '20000A',
    "Description" : 'WHITE HANGING HEART T-LIGHT HOLDER',
    "Quantity" :6,
    "InvoiceDate" :'12/1/2010 14:45',
    "UnitPrice" :2.5,
    "CustomerID" :18000,
    "Country" : 'France'}

# save to dataframe
df_inf = pd.DataFrame([df_inf])
df_inf

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,50000,20000A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 14:45,2.5,18000,France


In [5]:
dateLengths = df_inf.InvoiceDate.str.len()
dateLengths.value_counts()

df_inf["InvoiceDate"] = pd.to_datetime(df_inf["InvoiceDate"], format="%m/%d/%Y %H:%M")
df_inf["InvoiceDates"] = df_inf["InvoiceDate"].dt.strftime("%m/%d/%Y")
df_inf["InvoiceDates"] = pd.to_datetime(df_inf["InvoiceDates"], format="%m/%d/%Y")
df_inf["InvoiceTime"] = df_inf["InvoiceDate"].dt.strftime("%H:%M")


#Calculating the TotalPrice per order (UnitPrice and Quantity)
df_inf["TotalPrice"] = df_inf["Quantity"]*df_inf["UnitPrice"]


df_inf

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,InvoiceDates,InvoiceTime,TotalPrice
0,50000,20000A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 14:45:00,2.5,18000,France,2010-12-01,14:45,15.0


In [6]:
current = pd.Timestamp(datetime(2011, 11, 28))

In [7]:
df_inf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   InvoiceNo     1 non-null      object        
 1   StockCode     1 non-null      object        
 2   Description   1 non-null      object        
 3   Quantity      1 non-null      int64         
 4   InvoiceDate   1 non-null      datetime64[ns]
 5   UnitPrice     1 non-null      float64       
 6   CustomerID    1 non-null      int64         
 7   Country       1 non-null      object        
 8   InvoiceDates  1 non-null      datetime64[ns]
 9   InvoiceTime   1 non-null      object        
 10  TotalPrice    1 non-null      float64       
dtypes: datetime64[ns](2), float64(2), int64(2), object(5)
memory usage: 220.0+ bytes


In [8]:
RFMScore = df_inf.groupby('CustomerID').agg({'InvoiceDates': lambda x: (current - x.max()).days,
                                            'InvoiceNo': lambda x: x.count(),
                                            'TotalPrice': lambda x: x.sum()
                                            })

RFMScore

Unnamed: 0_level_0,InvoiceDates,InvoiceNo,TotalPrice
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
18000,362,1,15.0


In [9]:
RFMScore.rename(columns={'InvoiceDates':'Recency','InvoiceNo':'Frequency','TotalPrice':'Monetary'},inplace = True)
RFMScore

Unnamed: 0_level_0,Recency,Frequency,Monetary
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
18000,362,1,15.0


In [10]:
# Function

def RecencyScore(x):
    if x <= (-357.0):
        return 1
    elif x <= (-323.0):
        return 2
    elif x <= (-230.0):
        return 3
    else:
        return 4

def FreqScore(x):
    if x <= 17.00:
        return 4
    elif x <= 41.00:
        return 3
    elif x <= 99.25:
        return 2
    else:
        return 1

def MonetScore(x):
    if x <= 291.795:
        return 4
    elif x <= 644.070:
        return 3
    elif x <= 1608.335:
        return 2
    else:
        return 1

In [11]:
# Applying the created function on the respective columns
'''
Recency_Score(x,p,d):
x = value
p = recency, monetary_value, frequency
d = quartiles dict
'''
RFMScore['R'] = RFMScore['Recency'].apply(RecencyScore )
RFMScore['F'] = RFMScore['Frequency'].apply(FreqScore  )
RFMScore['M'] = RFMScore['Monetary'].apply(MonetScore)

# Display the DataFrame of RFM
RFMScore

Unnamed: 0_level_0,Recency,Frequency,Monetary,R,F,M
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
18000,362,1,15.0,4,4,4


In [12]:
#Creating a new field 'RFMValue' to split the customers into 10 segments
RFMScore['RFMValue'] = RFMScore[['R','F','M']].sum(axis = 1)
RFMScore['RFMGroup'] = RFMScore.R.map(str) + RFMScore.F.map(str) + RFMScore.M.map(str)
RFMScore['RFMGroup'] = RFMScore['RFMGroup'].astype(int)
RFMScore.reset_index()

Unnamed: 0,CustomerID,Recency,Frequency,Monetary,R,F,M,RFMValue,RFMGroup
0,18000,362,1,15.0,4,4,4,12,444


In [13]:
# melakukan scaling data
RFMS_scaled = model_scaler.transform(RFMScore)
RFMS_scaled = pd.DataFrame(RFMS_scaled, columns=RFMScore.columns)
RFMS_scaled = RFMScore.drop(['RFMValue','RFMGroup'],axis=1)
RFMS_scaled

Unnamed: 0_level_0,Recency,Frequency,Monetary,R,F,M
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
18000,362,1,15.0,4,4,4


In [14]:
# menggunakan model PCA
df_inf_pca = model_pca.transform(RFMS_scaled)

In [15]:
# prediksi mengggunakan model Kmeans
df_prediksi = model_km.predict(df_inf_pca)

if df_prediksi == 0:
    print("Customer class : Platinum")
elif df_prediksi == 1:
    print("Customer class : Diamond")
elif df_prediksi == 2:
    print("Customer class : Gold")
elif df_prediksi == 3:
    print("Customer class : Silver")


Customer class : Silver
