In [26]:
# Import libraries
import numpy as np
import pandas as pd

import joblib
import matplotlib.pyplot as plt

In [6]:
# load dataset

def load_data(file_name, sheet=0):
    return pd.read_excel(file_name, sheet_name=sheet)

In [7]:
# get model features

def get_feat():

    state = input('State: ')
    property_valuation = input('Property Valuation: ')
    gender = input('Gender')
    owns_car = input('Own a car (Yes/No): ')
    past_3_years_bike_related_purchases = input('past_3_years_bike_related_purchases: ')
    job_industry_category = input('job_industry_category: ')
    wealth_segment = input('Wealth segment: ')
    tenure = int(input('Tenure: '))
    
    dataframe = pd.DataFrame({'state':state, 'property_valuation':property_valuation, 'owns_car':owns_car, \
                            'past_3_years_bike_related_purchases':past_3_years_bike_related_purchases, \
                             'job_industry_category':job_industry_category, 'wealth_segment':wealth_segment, \
                             'tenure':tenure, 'gender':gender}, index=[0])
    
    return dataframe

In [8]:
# df = get_feat()

In [9]:
# perform label encoder

def encode_cat(data:pd.DataFrame):
    data['gender'].replace({'Female' : 1, 'Male' : 2}, inplace=True)
    data['job_industry_category'].replace({'Entertainment' : 1, 'Telecommunications' : 2, 'IT' : 3, 'Manufacturing' : 4, 'Financial Services' : 5, 'Retail' : 6, 'Health' : 7, 'Property' : 8, 'Argiculture' : 9}, inplace=True)
    data['state'].replace({'NSW' : 1, 'VIC' : 2, 'QLD' : 3}, inplace=True)
    data['wealth_segment'].replace({'Affluent Customer' : 1, 'Mass Customer' : 2, 'High Net Worth' : 3}, inplace=True)
    data['owns_car'].replace({'Yes' : 1, 'No' : 2}, inplace=True)
    
    return data

In [10]:
# fill missing values

def fill_missing_value(data:pd.DataFrame):
    fill_with_vals = {'state':1, 'property_valuation':7.52, 'gender':1, 'past_3_years_bike_related_purchases':48.81, \
               'wealth_segment': 2, 'owns_car':1, 'tenure':10.68, 'job_industry_category':4}
    features = data.columns
    for feat in features:
        if data[feat].values=='':
            data[feat] = fill_with_vals[feat]
    return data

In [21]:
def scale(data):
    scaler = joblib.load('scaler.pickle')
    data = scaler.transform(data)
    
    return data

In [24]:
def lasso_predict(data):
    model = joblib.load('Lasso_model.pkl')
    value = model.predict(data)
    
    return f'The estimated profit for this customer is: {value}'

In [13]:
data = get_feat()

State:  VIC
Property Valuation:  75
Gender Female
Own a car (Yes/No):  Yes
past_3_years_bike_related_purchases:  45
job_industry_category:  IT
Wealth segment:  Mass Customer
Tenure:  5


In [14]:
data

Unnamed: 0,state,property_valuation,owns_car,past_3_years_bike_related_purchases,job_industry_category,wealth_segment,tenure,gender
0,VIC,75,Yes,45,IT,Mass Customer,5,Female


In [15]:
encoded_data = encode_cat(data)
encoded_data

Unnamed: 0,state,property_valuation,owns_car,past_3_years_bike_related_purchases,job_industry_category,wealth_segment,tenure,gender
0,2,75,1,45,3,2,5,1


In [22]:
scaled_data = scale(encoded_data)
scaled_data

array([[ 0.40123378, 23.89413673, -0.94891267, 87.02259091, -1.60214831,
        -1.73985382,  4.22693897, -1.7255905 ]])

In [25]:
lasso_predict(scaled_data)

'The estimated profit for this customer is: [-1202.66569868]'