# Short-Term Customer Learning

In [13]:
import sys
sys.path.append('./learn')

from metric_auc import auc
from learn_ann import get_learner_params_all
from tensorflow import keras
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold


DIR = './organized_dataset/'

seed = 7
np.random.seed(seed)

df = pd.read_csv(DIR+'feature_engineered_test.csv',header=0)

print('Data loading Done')

Data loading Done


In [8]:
class_weights = {0: 1,1: 40.}
print(class_weights)

{0: 1, 1: 40.0}


In [14]:
SHORT_TERM = 6

print('Term: ',SHORT_TERM)

temp = df.groupby('CID').count()
temp.rename(columns={temp.columns[0]:'length'}, inplace=True)

# get as Series of CID with contract-length less than SHORT_TERM
temp = temp.loc[temp['length'] < SHORT_TERM].iloc[:,0]
print('# of short-term CID: ',temp.shape[0])

short_cid_set = set(temp.index.tolist())

short_df = pd.DataFrame(columns=df.columns)

for cid in short_cid_set:
    temp = df.loc[df['CID'] == cid,:]
    
#     df.append is not inplace
    short_df = short_df.append(temp, ignore_index=True)

print('dataset: ',short_df.shape)

Term:  6
# of short-term CID:  47
dataset:  (131, 101)


## ANN

In [15]:
# https://machinelearningmastery.com/evaluate-performance-deep-learning-models-keras/

X = short_df.iloc[:,3:-1].values
y = short_df.iloc[:,-1].values.astype(int)

M = X.shape[1]
print(X.shape)
print(y.shape)

print(y)

# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []

for train, test in kfold.split(X,y):
    model = keras.Sequential()
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dense(M, input_dim=M, kernel_initializer='normal', activation='sigmoid'))
    model.add(keras.layers.Dense(M//2, kernel_initializer='normal', activation='relu'))
    model.add(keras.layers.Dense(1, kernel_initializer='normal', activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[auc])

    model.fit(X[train], y[train], epochs=100, class_weight=class_weights, verbose=0)
    scores = model.evaluate(X[test],y[test])
    print("%s: %.5" % (model.metrics_names[1], scores[1]))
    cvscores.append(scores[1])
    
print("%.5f (+/- %.5f)" % (np.mean(cvscores), np.std(cvscores)))

print('Done')


(131, 97)
(131,)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]




auc: 0.8783
auc: 0.8796
auc: 0.6239
auc: 0.7649
auc: 0.8975
auc: 0.9358
auc: 0.8144
auc: 0.5000
auc: 0.8869
auc: 0.9556
0.8137 (+/- 0.1388)
Done
