In [1]:
import os
os.chdir('../../')

In [2]:
import numpy as np
import pandas as pd

from scr.util import *
from scr.engineering import *

In [3]:
df_train = pd.read_csv('data/feature_engineered/train_feature_ok.csv')
df_test = pd.read_csv('data/feature_engineered/test_feature_ok.csv')

In [9]:
# 特徴量作成前のデータセット

# display(df_train.head())
# display(df_test.head())

Unnamed: 0,id,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,...,NumberOfTrips,Passport,PitchSatisfactionScore,Designation,MonthlyIncome,ProdTaken,Marry,Car,Child,AgeGroup
0,0,50,Self Enquiry,2,6.803505,Large Business,male,1.0,4.0,Basic,...,5.0,1,4,Executive,12.444719,1,Single,No Car,0_child,50s
1,1,56,Company Invited,1,6.734592,Salaried,male,1.0,4.0,Standard,...,2.0,1,4,Senior Manager,12.910348,0,Divorced,Has Car,0_child,50s
2,2,35,Self Enquiry,1,6.398595,Large Business,female,1.0,3.0,Basic,...,4.0,0,4,Executive,12.535901,1,Married,No Car,0_child,30s
3,3,37,Self Enquiry,2,6.985642,Small Business,female,1.0,3.0,Standard,...,1.0,0,5,Senior Manager,12.697122,0,Divorced,Has Car,0_child,30s
4,4,48,Company Invited,3,6.928538,Small Business,female,1.0,3.0,Basic,...,4.0,0,4,Executive,12.462403,1,Single,Has Car,0_child,40s


Unnamed: 0,id,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,PreferredPropertyStar,NumberOfTrips,Passport,PitchSatisfactionScore,Designation,MonthlyIncome,Marry,Car,Child,AgeGroup
0,3489,48,Self Enquiry,2,6.660575,Small Business,male,1.0,4.0,Super Deluxe,3.0,7.0,0,3,AVP,13.116247,Married,Has Car,0_child,40s
1,3490,30,Self Enquiry,2,6.580639,Small Business,female,1.0,4.0,Standard,3.0,4.0,1,3,Senior Manager,12.611541,Married,No Car,0_child,30s
2,3491,25,Self Enquiry,1,6.293419,Salaried,female,1.0,4.0,Basic,3.0,1.0,0,3,Executive,12.468441,Divorced,No Car,0_child,20s
3,3492,21,Company Invited,2,6.042633,Salaried,male,1.0,4.0,Basic,4.0,1.0,0,3,Senior Manager,12.46796,Divorced,Has Car,0_child,20s
4,3493,41,Company Invited,1,6.042633,Salaried,male,1.0,4.0,Basic,3.0,1.0,0,4,Executive,12.501838,Single,Has Car,0_child,40s


# 特徴量作成

## TypeOfContactNuLL
TypeOfContact が欠損値の場合を、明らかに学習させるための特徴量

In [13]:
df_train['TypeofContactNULL'] = df_train['TypeofContact'].apply(make_TypeOfContactNULL)
df_test['TypeofContactNULL'] = df_test['TypeofContact'].apply(make_TypeOfContactNULL)

## Motivation
顧客の旅行に対する意欲を表す特徴量

In [18]:
df_train['Motivation'] = df_train['NumberOfPersonVisiting'] * df_train['NumberOfFollowups'] + (df_train['Passport'] * 10)
df_test['Motivation'] = df_test['NumberOfPersonVisiting'] * df_test['NumberOfFollowups'] + (df_test['Passport'] * 10)

## EconomicPower
顧客の経済力を表す特徴量

In [21]:
df_train['EconomicPower'] = df_train['MonthlyIncome'] * (4 - df_train['CityTier']) * df_train['Gender'].apply(make_motivation_gender)
df_test['EconomicPower'] = df_test['MonthlyIncome'] * (4 - df_test['CityTier']) * df_test['Gender'].apply(make_motivation_gender)

## Child01
顧客に子どもがいるかどうかを表す特徴量

In [6]:
df_train['Child01'] = df_train['Child'].apply(make_child01)
df_test['Child01'] = df_test['Child'].apply(make_child01)

## TripEasier
旅行の行きやすさを表す特徴量

In [22]:
df_train['TripEasier'] = (5 - df_train['NumberOfPersonVisiting']) * df_train['ProductPitched'].apply(make_TripEasier_ProductPitched) * df_train['Marry'].apply(make_TripEasier_Marry) * df_train['Child'].apply(make_TripEasier_Child)
df_test['TripEasier'] = (5 - df_test['NumberOfPersonVisiting']) * df_test['ProductPitched'].apply(make_TripEasier_ProductPitched) * df_test['Marry'].apply(make_TripEasier_Marry) * df_test['Child'].apply(make_TripEasier_Child)

## SalesPerformance
営業担当者の単位時間当たりのパフォーマンス

In [13]:
df_train['SalesPerformance'] = df_train['NumberOfFollowups'] * df_train['PitchSatisfactionScore'] / df_train['DurationOfPitch']
df_test['SalesPerformance'] = df_train['NumberOfFollowups'] * df_test['PitchSatisfactionScore'] / df_test['DurationOfPitch']

## LivingCost
生活コストを表す数値

In [5]:
def make_LivingCost_CityTier(citytier):
    if citytier == 1:
        return 1.5
    elif citytier == 2:
        return 1.3
    else:
        return 1

In [6]:
def make_LivingCost_Child(child):
    if child == '0_child':
        return 0
    elif child == '1_child':
        return 0.1
    elif child == '2_child':
        return 0.2
    else:
        return 0.3

In [7]:
def make_LivingCost_Marry(marry):
    if marry == 'Married':
        return 0.1
    else:
        return 0

In [8]:
def make_LivingCost_Car(car):
    if car == 'Has Car':
        return 0.05
    else:
        return 0

In [9]:
df_train['LivingCost'] = df_train['MonthlyIncome'] / (df_train['CityTier'].apply(make_LivingCost_CityTier) * (1 + df_train['Child'].apply(make_LivingCost_Child) + df_train['Marry'].apply(make_LivingCost_Marry) + df_train['Car'].apply(make_LivingCost_Car)))
df_test['LivingCost'] = df_test['MonthlyIncome'] / (df_test['CityTier'].apply(make_LivingCost_CityTier) * (1 + df_test['Child'].apply(make_LivingCost_Child) + df_test['Marry'].apply(make_LivingCost_Marry) + df_test['Car'].apply(make_LivingCost_Car)))

## EconomicStability
経済安定性指標

In [17]:
def make_EconomicStability_Occupation(occupation):
    if occupation == 'Large Business':
        return 1.2
    elif occupation == 'Salaried':
        return 1.1
    else:
        return 0.9

In [18]:
def make_EconomicStability_Disignation(designation):
    if designation == 'VP':
        return 1.5
    elif designation == 'AVP':
        return 1.4
    elif designation == 'Senior Manager':
        return 1.3
    elif designation == 'Manager':
        return 1.2
    else:
        return 1.1

In [20]:
df_train['EconomicStability'] = df_train['MonthlyIncome'] * df_train['Occupation'].apply(make_EconomicStability_Occupation) * df_train['Designation'].apply(make_EconomicStability_Disignation)
df_test['EconomicStability'] = df_test['MonthlyIncome'] * df_test['Occupation'].apply(make_EconomicStability_Occupation) * df_test['Designation'].apply(make_EconomicStability_Disignation)

# CSV出力

In [22]:
if 'test' in df_train.columns:
    df_train = df_train.drop(columns='test', axis=1)

In [23]:
df_train.to_csv('data/feature_engineered/train_feature_ok.csv', index=False)
df_test.to_csv('data/feature_engineered/test_feature_ok.csv', index=False)