# productionize customer segment prediction model

1. Model Registration
2. Model Tracking 
3. Auto Logging


In [5]:
pip install --upgrade mlflow

Note: you may need to restart the kernel to use updated packages.


# mlflow tracking: Setup new experiment

In [6]:
import mlflow
import mlflow.sklearn
import mlflow.pyfunc

# MLflow tracking
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Customer_Segment_Prediction_Model")

<Experiment: artifact_location='mlflow-artifacts:/991329419849384240', creation_time=1714001602979, experiment_id='991329419849384240', last_update_time=1714001602979, lifecycle_stage='active', name='Customer_Segment_Prediction_Model', tags={}>

# Model Traning

In [7]:
import streamlit as st
import requests
import json

# Define the URL of the MLflow model
prediction_api_URL = "http://localhost:8000/predict/"
Causal_api_URL = "http://localhost:8000/causal/"
cluster_api_URL = "http://localhost:8000/clsuter/"

# Streamlit app layout
st.title('Customer-Personality-Analysis-2.0 Customer Insight Hub')

# Add tabs for different functionalities
tabs = ["Causal Inference", "Clustering", "Segment Prediction"]
selected_tab = st.sidebar.radio("Select Model", tabs)

# Display content based on selected tab
if selected_tab == "Causal Inference":
    # Prepare the data in the format the MLflow model expects
    target_options = ['Recency', 'Total_purchase', 'Total_amount']
    Target = st.selectbox("Select Target", target_options)
    
    treatment_options = ['Income_Category_High', 'Is_Parent', 'Cmp_Attitude', 'Complain']
    Treatment = st.selectbox("Select Treatment", treatment_options)
    
    confounding_options = ['Income_Category_High', 'Income_Category_Low', 'Income_Category_Medium',
                           'Complain', 'Is_Parent', 'Cmp_Attitude', 'Family_Size', 'Age',
                           'Member_Year', 'Total_amount', 'Total_purchase',
                           'NumWebVisitsMonth', 'NumDealsPurchases', 'Recency']
    ConfoundingVar = st.multiselect("Select Confounding Variables", confounding_options)
    
    # Button to call the model
    if st.button('Run Causal Inference Model'):
        data = json.dumps({
            "columns": ["Target", "Treatment", "Confounding Variables"],
            "data": [[Target, Treatment, ConfoundingVar]]
        })
        headers = {'Content-Type': 'application/json'}
        
        # Send the data to the model
        response = requests.post(Causal_api_URL, data=data, headers=headers)
        
        if response.status_code == 200:
            result = response.json()
            st.success(f'Result: {result}')
        else:
            st.error('Failed to get prediction from the model.')
elif selected_tab == "Clustering":
    # Button to run clustering
    if st.button('Clustering'):
        # Send the data to the model
        response = requests.post(cluster_api_URL)
        
        if response.status_code == 200:
            result = response.json()
            st.success(f'Result: {result}')
        else:
            st.error('Failed to get prediction from the model.')
elif selected_tab == "Segment Prediction":
    # Input fields for user data
    Total_amount = st.number_input("Total Amount", value=0.0)
    Is_Parent = st.selectbox("Is Parent", [True, False])
    Total_Children = st.number_input("Total Children", value=0, step=1)
    NumDealsPurchases = st.number_input("Number of Deals/Purchases", value=0, step=1)
    Income = st.number_input("Income", value=0.0)
    Family_Size = st.number_input("Family Size", value=0, step=1)
    NumWebVisitsMonth = st.number_input("Number of Web Visits per Month", value=0, step=1)
    Total_purchase = st.number_input("Total Purchase", value=0.0)
    MntWines = st.number_input("Amount Spent on Wines", value=0.0)
    Teenhome = st.number_input("Number of Teenagers at Home", value=0, step=1)

    # Button to make prediction
    if st.button('Predict'):
        # Prepare data in JSON format
        data = json.dumps({
            "Total_amount": Total_amount,
            "Is_Parent": Is_Parent,
            "Total_Children": Total_Children,
            "NumDealsPurchases": NumDealsPurchases,
            "Income": Income,
            "Family_Size": Family_Size,
            "NumWebVisitsMonth": NumWebVisitsMonth,
            "Total_purchase": Total_purchase,
            "MntWines": MntWines,
            "Teenhome": Teenhome
        })
        headers = {'Content-Type': 'application/json'}
        
        # Send the data to the model
        response = requests.post(prediction_api_URL, data=data, headers=headers)
        
        if response.status_code == 200:
            result = response.json()
            st.success(f'Result: {result}')
        else:
            st.error('Failed to get prediction from the model.')


2024/04/26 02:12:29 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '54691f065809466d81eb459b6682bd2d', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow


Fitting 2 folds for each of 24 candidates, totalling 48 fits


2024/04/26 02:12:31 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '43aeb7046acb485ca60ec8e6b2c59dbb', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow
2024/04/26 02:12:32 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'b19313b6dbe64b50aca0d5d9fa10270c', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow
2024/04/26 02:12:33 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '9a15e12949e84e6b89fd3cf3f54bb648', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current xgboost workflow
2024/04/26 02:12:35 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '6bec833cf6b64caea4beb77fbb171218', which will track hyperparameters, performance metrics, model artifacts, and lineage i

              precision    recall  f1-score   support

           0       0.84      1.00      0.92       237
           1       1.00      1.00      1.00       118
           2       0.00      0.00      0.00         2
           3       1.00      1.00      1.00         9
           4       1.00      0.48      0.65        81

    accuracy                           0.90       447
   macro avg       0.77      0.70      0.71       447
weighted avg       0.91      0.90      0.89       447

Best parameters found:  {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50, 'subsample': 1.0}
Accuracy:  0.901565995525727


# Model Logging and Registering

In [10]:
option = "REGISTRY"

if option == "REGISTRY":
    mlflow.end_run()
    with mlflow.start_run():
        # Log parameters and metrics
        mlflow.log_params(params)
        mlflow.log_metric("accuracy", accuracy)

        # Set a tag describing the run
        mlflow.set_tag("Training Info", "Model for Customer Segment Prediction")

        # Infer signature of the input and output of the model
        signature = infer_signature(X_train_scaled, y_pred)
        
        # Log the model and register it in the MLflow model registry under the name "Customer_Segment_Prediction_Model"
        mlflow.sklearn.log_model(
            sk_model=best_clf,
            artifact_path="model",
            registered_model_name="Customer_Segment_Prediction_Model"
        )


Registered model 'Customer_Segment_Prediction_Model' already exists. Creating a new version of this model...
2024/04/26 02:15:32 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Customer_Segment_Prediction_Model, version 4
Created version '4' of model 'Customer_Segment_Prediction_Model'.


In [11]:
# Model retrieval and prediction check
loaded_model = mlflow.pyfunc.load_model(f"models:/Customer_Segment_Prediction_Model/Production")
input_data = X_test_scaled[0:1]
predictions = loaded_model.predict(input_data)

  latest = client.get_latest_versions(name, None if stage is None else [stage])


Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

In [12]:
print(input_data)

[[0.25793651 1.         0.33333333 0.26666667 0.29469305 0.5
  0.25       0.53125    0.24061662 0.5       ]]


In [13]:
print("Model predictions: ", predictions)

Model predictions:  [0]
