In [1]:
# Import libraries
import numpy as np
import pandas as pd

import joblib
import matplotlib.pyplot as plt

In [2]:
# load dataset

def load_data(file_name, sheet=0):
    return pd.read_excel(file_name, sheet_name=sheet)

In [3]:
# get model features

def get_feat():

    state = input('State: ')
    property_valuation = input('Property Valuation: ')
    gender = input('Gender')
    owns_car = input('Own a car (Yes/No): ')
    past_3_years_bike_related_purchases = input('past_3_years_bike_related_purchases: ')
    job_industry_category = input('job_industry_category: ')
    wealth_segment = input('Wealth segment: ')
    tenure = int(input('Tenure: '))
    
    dataframe = pd.DataFrame({'state':state, 'property_valuation':property_valuation, 'owns_car':owns_car, \
                            'past_3_years_bike_related_purchases':past_3_years_bike_related_purchases, \
                             'job_industry_category':job_industry_category, 'wealth_segment':wealth_segment, \
                             'tenure':tenure, 'gender':gender}, index=[0])
    
    return dataframe

In [4]:
# perform label encoder

def encode_cat(data:pd.DataFrame):
    data['gender'].replace({'Female' : 1, 'Male' : 2}, inplace=True)
    data['job_industry_category'].replace({'Entertainment' : 1, 'Telecommunications' : 2, 'IT' : 3, 'Manufacturing' : 4, 'Financial Services' : 5, 'Retail' : 6, 'Health' : 7, 'Property' : 8, 'Argiculture' : 9}, inplace=True)
    data['state'].replace({'NSW' : 1, 'VIC' : 2, 'QLD' : 3}, inplace=True)
    data['wealth_segment'].replace({'Affluent Customer' : 1, 'Mass Customer' : 2, 'High Net Worth' : 3}, inplace=True)
    data['owns_car'].replace({'Yes' : 1, 'No' : 2}, inplace=True)
    
    return data

In [69]:
# fill missing values

def fill_missing_value(data:pd.DataFrame):
    fill_with_vals = {'state':1, 'property_valuation':7.52, 'gender':1, 'past_3_years_bike_related_purchases':48.81, \
               'wealth_segment': 2, 'owns_car':1, 'tenure':10.68, 'job_industry_category':4}
    features = data.columns
    for feat in features:
        data[feat].fillna(fill_with_vals[feat], inplace=True)
    return data

In [7]:
def scale(data):
    scaler = joblib.load('scaler.pickle')
    data = scaler.transform(data)
    
    return data

In [8]:
def lasso_predict(data):
    model = joblib.load('Lasso_model.pkl')
    value = model.predict(data)
    
    return f'The estimated profit for this customer is: {value}'

In [9]:
def get_price():
    data = get_feat()
    data = encode_cat(data)
    data = fill_missing_value(data)
    data = scale(data)
    price = lasso_predict(data)
    
    return price

In [10]:
get_price()

State:  QLD
Property Valuation:  6
Gender Male
Own a car (Yes/No):  Yes
past_3_years_bike_related_purchases:  86
job_industry_category:  Manufacturing
Wealth segment:  Mass Customer
Tenure:  14


  


'The estimated profit for this customer is: [-5469.45474242]'

In [14]:
data = get_feat()

In [15]:
data

Unnamed: 0,state,property_valuation,owns_car,past_3_years_bike_related_purchases,job_industry_category,wealth_segment,tenure,gender
0,QLD,,Yes,45,Agriculture,Mass Customer,3,Female


In [9]:
data2 = fill_missing_value(data)

NameError: name 'data' is not defined

In [17]:
data2

Unnamed: 0,state,property_valuation,owns_car,past_3_years_bike_related_purchases,job_industry_category,wealth_segment,tenure,gender
0,QLD,7.52,Yes,45,Agriculture,Mass Customer,3,Female


In [81]:
data = pd.read_csv('training_data.csv')

In [70]:
final_data = data[['state', 'property_valuation', 'gender', 'owns_car', 'past_3_years_bike_related_purchases', 'job_industry_category',\
                  'wealth_segment', 'tenure']]

In [73]:
final_data.isnull().sum()

state                                  0
property_valuation                     0
gender                                 0
owns_car                               0
past_3_years_bike_related_purchases    0
job_industry_category                  0
wealth_segment                         0
tenure                                 0
dtype: int64

In [72]:
final_data = fill_missing_value(final_data)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


In [63]:
final_data.head()

Unnamed: 0,state,property_valuation,gender,owns_car,past_3_years_bike_related_purchases,job_industry_category,wealth_segment,tenure
0,New South Wales,10.0,Female,Yes,93.0,Health,Mass Customer,11.0
1,New South Wales,10.0,Male,Yes,81.0,Financial Services,Mass Customer,16.0
2,QLD,9.0,Male,Yes,61.0,Property,Mass Customer,15.0
3,New South Wales,4.0,Male,No,33.0,IT,Mass Customer,7.0
4,VIC,9.0,Female,Yes,56.0,,Affluent Customer,8.0


In [57]:
final_data['state'].values==np.nan

array([False, False, False, ..., False, False, False])

In [77]:
!pwd

'pwd' is not recognized as an internal or external command,
operable program or batch file.


In [82]:
data = pd.read_csv('packages\\regression\\datasets\\train.csv')

In [84]:
data.state.unique()

array(['NSW', 'VIC', 'QLD', 'New South Wales', 'Victoria', nan],
      dtype=object)

In [85]:
data.head()

Unnamed: 0,customer_id,address,postcode,state,country,property_valuation,first_name,last_name,gender,past_3_years_bike_related_purchases,DOB,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,tenure,profit
0,222,04139 Delladonna Place,2519.0,NSW,Australia,7.0,Nev,Larive,Male,89.0,1978-09-03,Clinical Specialist,Health,Mass Customer,N,No,16.0,1428.06
1,3283,4380 Rusk Terrace,2086.0,NSW,Australia,11.0,Rowe,Barbary,Female,46.0,1979-01-15,Administrative Officer,Property,Mass Customer,N,Yes,6.0,5058.9
2,1333,0 Fulton Drive,3806.0,VIC,Australia,8.0,Phillipp,McMonnies,Male,4.0,1973-08-23,Compensation Analyst,Financial Services,High Net Worth,N,No,9.0,1924.18
3,2688,8658 Maple Wood Plaza,3020.0,VIC,Australia,7.0,Shelden,Lorent,Male,53.0,1977-05-18,Database Administrator III,Property,Mass Customer,N,No,13.0,1017.75
4,1160,92867 Lukken Lane,3142.0,VIC,Australia,9.0,Trisha,Nisuis,Female,98.0,1965-04-08,Speech Pathologist,IT,Affluent Customer,N,No,10.0,684.52
