#### Customer segmentation - assignment of segment to the new customers

In [14]:
import os
import psycopg2
from sqlalchemy import create_engine
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn_pandas import DataFrameMapper
from customer_segmentation_helper import *
import pickle
import warnings
warnings.filterwarnings("ignore")

#### Define parameters to load saved model, scaler and new customers

In [15]:
# path to the dataset
path_to_file_data = 'Mall_Customers-New.csv'

# path to the model
path_to_file_model = 'model.pkl'

# path to the mapper of the scaler
path_to_file_scaler_mapper = 'scaler_mapper.pkl'

# Boolean value, whether dataset should be pulled from the database (PostgreSQL)
from_database = False

## Credentials to create connection to the database with psycopg2 and sqlalchemy
#database_credentials = pd.DataFrame({
#    'host': [os.environ['HOST_WAREHOUSE']],
#    'database': [os.environ['NAME_WAREHOUSE']],
#    'user': [os.environ['USER_WAREHOUSE']],
#    'password': [os.environ['PASSWORD_WAREHOUSE']],
#    'engine': [os.environ['ENGINE_WAREHOUSE']]
#})

## SQL query to download customers table
## Make sure to download the whole table as the original one will get replaced, not only specified features will be used
#sql_query =  """
#               SELECT *
#               FROM customers
#             """     


# categorical variables
cat_features = ['gender']

# Names of numerical features
numerical_features = ['age', 'annual_income_thousands', 'spending_score']


#### Load data, model and feature scaler

In [16]:
data, segmented_data, model, scaler_mapper = load_data_model(from_database=False, path_to_file_data=path_to_file_data, 
                                                             path_to_file_scaler_mapper = path_to_file_scaler_mapper, 
                                                             path_to_file_model = path_to_file_model)

#### Check whether there are new customers

In [17]:
if len(data) == 0:
    raise SystemExit("There are no new customers to assign segment to!")

#### Preprocess data

In [19]:
# encoding of new_data
new_data, cat_dummy = dummy_encode(data, cat_cols = cat_features)
print('Data with extra dummy columns:')
new_data.head()

Data with extra dummy columns:


Unnamed: 0,customer_id,gender,age,annual_income_thousands,spending_score,segment,segment_origin,Female,Male
4,5,Female,31,17,40,,,1,0
5,6,Female,22,17,76,,,1,0
6,7,Female,35,18,6,,,1,0
7,8,Female,23,18,94,,,1,0
8,9,Male,64,19,3,,,0,1


In [20]:
print('Data with scaled features:')
new_data, numerical_scaled = preprocess_data(new_data, dummy_cols = cat_dummy, num_cols = numerical_features, scaler_mapper = scaler_mapper)

Data with scaled features:


Unnamed: 0,customer_id,gender,age,annual_income_thousands,spending_score,segment,segment_origin,Female,Male,age_scaled,annual_income_thousands_scaled,spending_score_scaled
4,5,Female,31,17,40,,,0.886405,-0.886405,-0.563369,-1.66266,-0.39598
5,6,Female,22,17,76,,,0.886405,-0.886405,-1.209269,-1.66266,1.001596
6,7,Female,35,18,6,,,0.886405,-0.886405,-0.276302,-1.624491,-1.715913
7,8,Female,23,18,94,,,0.886405,-0.886405,-1.137502,-1.624491,1.700384
8,9,Male,64,19,3,,,-1.128152,1.128152,1.804932,-1.586321,-1.832378


#### Assign segments to the new customers

In [21]:
print('Data with assigned segments and segment origin:')
new_data = assign_segments(model = model, data = new_data, dummy_cols = cat_dummy, scaled_cols = numerical_scaled)

Data with assigned segments and segment origin:


Unnamed: 0,customer_id,gender,age,annual_income_thousands,spending_score,segment,segment_origin
4,5,Female,31,17,40,2,assigned
5,6,Female,22,17,76,2,assigned
6,7,Female,35,18,6,1,assigned
7,8,Female,23,18,94,2,assigned
8,9,Male,64,19,3,0,assigned


### Save data

In [22]:
save_data(from_database = from_database, path_to_file_data = path_to_file_data, data = new_data, segmented_data = segmented_data)

Segments were assigned and saved
