In [1]:
import keras
import tensorflow as tf
import pandas as pd
import numpy as np
import csv
import math
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import random
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier

Using TensorFlow backend.


In [2]:
train_path = "train_processed.csv"
train = pd.read_csv(train_path ,sep=",", index_col=None, dtype = 'float32' )

In [3]:
test_path = "test_processed.csv"
test = pd.read_csv(test_path ,sep=",", index_col=None, dtype = 'float32' )

In [4]:
train.shape

(79853, 13)

In [5]:
def normalize_std(x):
    scaler = preprocessing.StandardScaler()
    scaler.fit(x)
    return scaler.transform(x)


def get_normed(train):
    
    train_cont = train[['perc_premium_paid_by_cash_credit', 'age_in_days', 'Income', 'Count_3-6_months_late', 
                   'Count_6-12_months_late', 'Count_more_than_12_months_late', 'application_underwriting_score',
                   'no_of_premiums_paid', 'premium']]
    
    train_dis = train[['sourcing_channel', 'residence_area_type']]
    
    
    train_cont_norm = normalize_std(train_cont)
    train_cont_norm = pd.DataFrame(train_cont_norm)
    
    train_cont_norm.reset_index(drop=True, inplace=True)
    train_dis.reset_index(drop=True, inplace=True)

    train_norm = pd.concat([train_cont_norm,train_dis], axis=1, ignore_index=True)
    
    return train_norm

In [6]:
# MAX_DEPTH = 5
# N_ESTIMATORS = 50
# MIN_SAMPLES_LEAF = 60

def random_forest(data_train, data_test,label_train, MAX_DEPTH,N_ESTIMATORS, MIN_SAMPLES_LEAF, MIN_SAMPLES_SPLIT,MAX_LEAF_NODES, MIN_WEIGHT_FRACTION_LEAF):
    
    df_data_train = pd.DataFrame(data_train)

    df_data_test = pd.DataFrame(data_test)

#     print ("data train shape is : [{}]".format(df_data_train.shape))
#     print ("data test shape is : [{}]".format(df_data_test.shape))

    regr = RandomForestClassifier(max_depth=MAX_DEPTH,random_state=6,n_estimators=N_ESTIMATORS,min_samples_leaf=MIN_SAMPLES_LEAF, min_samples_split=MIN_SAMPLES_SPLIT, max_leaf_nodes=MAX_LEAF_NODES, min_weight_fraction_leaf=MIN_WEIGHT_FRACTION_LEAF, n_jobs=-1) 
    regr.fit(df_data_train, label_train)
    pred_test = regr.predict_proba(df_data_test)
    pred_test = np.delete(pred_test,0,1) 


    return pred_test

In [7]:
def xgb(data_train, data_test,label_train, MAX_DEPTH,N_ESTIMATORS, MIN_CHILD_WEIGHT, LEARING_RATE):
    
    model = XGBClassifier( max_depth = MAX_DEPTH, n_estimators=N_ESTIMATORS, min_child_weight=MIN_CHILD_WEIGHT, learning_rate=LEARNING_RATE, random_state=6)
    model.fit(train_norm, train_label.ravel())
    
    pred_test = model.predict_proba(data_test)
    pred_test = np.delete(pred_test,0,1) 
    
    return pred_test

In [8]:
test_renewal = np.ones((test.shape[0], 1)) * 2
test_renewal = pd.DataFrame(test_renewal)
test_renewal.rename(columns = {0 : 'renewal'}, inplace = True)

new_test = pd.concat([test, test_renewal], axis = 1)

In [9]:
combined_data = pd.concat([train, new_test], axis = 0)

In [10]:
combined_data.mean()

id                                   57039.000000
perc_premium_paid_by_cash_credit         0.314339
age_in_days                          18839.952260
Income                              207039.010405
Count_3-6_months_late                    0.245689
Count_6-12_months_late                   0.078948
Count_more_than_12_months_late           0.059439
application_underwriting_score          99.065675
no_of_premiums_paid                     10.871850
sourcing_channel                         1.820744
residence_area_type                      1.603023
premium                              10897.570939
renewal                                  1.256195
dtype: float64

In [11]:
# impute missing data

combined_data = combined_data.fillna(combined_data.mean())

In [12]:
combined_data_norenewal = combined_data.iloc[:, :12].copy()

In [13]:
fake_renewal =  combined_data.iloc[:, 12].copy()

In [14]:
combined_data_norenewal.head(5)

Unnamed: 0,id,perc_premium_paid_by_cash_credit,age_in_days,Income,Count_3-6_months_late,Count_6-12_months_late,Count_more_than_12_months_late,application_underwriting_score,no_of_premiums_paid,sourcing_channel,residence_area_type,premium
0,110936.0,0.429,12058.0,355060.0,0.0,0.0,0.0,99.019997,13.0,3.0,2.0,3300.0
1,41492.0,0.01,21546.0,315150.0,0.0,0.0,0.0,99.889999,21.0,1.0,2.0,18000.0
2,31300.0,0.917,17531.0,84140.0,2.0,3.0,1.0,98.690002,7.0,3.0,1.0,3300.0
3,19415.0,0.049,15341.0,250510.0,0.0,0.0,0.0,99.57,9.0,1.0,2.0,9600.0
4,99379.0,0.052,31400.0,198680.0,0.0,0.0,0.0,99.870003,12.0,2.0,2.0,9600.0


In [15]:
combined_data_norenewal.shape

(114077, 12)

In [16]:
combined_data_norenewal_norm = get_normed(combined_data_norenewal)

In [17]:
combined_data_norenewal_norm.reset_index(drop=True, inplace=True)
fake_renewal.reset_index(drop=True, inplace=True)

combined_data_renewal_norm = pd.concat([combined_data_norenewal_norm, fake_renewal] , axis = 1, ignore_index= True)

In [18]:
combined_data_renewal_norm.rename(columns = {11 :'renewal'}, inplace = True)

train_pd = combined_data_renewal_norm.drop(combined_data_renewal_norm[(combined_data_renewal_norm.renewal == 2.0)].index)

In [19]:
test_pd = combined_data_renewal_norm.drop(combined_data_renewal_norm[(combined_data_renewal_norm.renewal != 2.0)].index)

In [20]:
train_norm = train_pd.iloc[:, :11].copy()
train_label = train_pd.iloc[:, 11].copy()

train_norm = np.array(train_norm)
train_label = np.array(train_label)

In [21]:
test_norm = np.array(test_pd.iloc[:, :11].copy())

In [25]:
#tune random forest using k-fold cross-validation

MAX_DEPTH = [10, 80, 90, 100, 200, 500]
N_ESTIMATORS = [100, 200, 300, 600, 1000]
MIN_SAMPLES_LEAF = [80,60,20]
MIN_SAMPLES_SPLIT = [2, 8, 12,22]
MAX_LEAF_NODES = [None, 40, 45, 50, 55, 60, 70]
MIN_WEIGHT_FRACTION_LEAF = [0.]

configsTried = []
iteration = 0

while iteration < 2520:

    max_depth = random.choice(MAX_DEPTH)
    n_estimators = random.choice(N_ESTIMATORS)
    min_samples_leaf = random.choice(MIN_SAMPLES_LEAF)
    min_samples_split = random.choice(MIN_SAMPLES_SPLIT)
    max_leaf_nodes = random.choice(MAX_LEAF_NODES)
    min_weight_fraction_leaf = random.choice(MIN_WEIGHT_FRACTION_LEAF)
    
    cfg = {
        "max_depth" : max_depth,
        "n_estimators": n_estimators,
        "min_samples_leaf": min_samples_leaf,
        'min_samples_split':min_samples_split,
        'max_leaf_nodes': max_leaf_nodes,
        'min_weight_fraction_leaf': min_weight_fraction_leaf}
    
    if cfg in configsTried:
        continue

    print "iteration : {}".format(iteration)
    print cfg

    splits = 5

    skf = StratifiedKFold(n_splits=splits, shuffle=True,  random_state=0)

    roc_score = 0

    train_norm = np.array(train_norm)
    train_label = np.array(train_label)

    for train_index, test_index in skf.split(train_norm, train_label):
        data_train , data_test = train_norm[train_index], train_norm[test_index]
        label_train , label_test = train_label[train_index], train_label[test_index]

        pred_test = random_forest(data_train, data_test, label_train.ravel(), max_depth,n_estimators,min_samples_leaf,min_samples_split, max_leaf_nodes, min_weight_fraction_leaf)
        roc_score += metrics.roc_auc_score(label_test, pred_test)

    configsTried.append(cfg)
    iteration += 1
    print "avg roc: {}".format(roc_score/splits)
    print "\n"

iteration : 0
{'max_leaf_nodes': None, 'min_samples_leaf': 80, 'n_estimators': 600, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 2, 'max_depth': 90}
avg roc: 0.84273787609


iteration : 1
{'max_leaf_nodes': 40, 'min_samples_leaf': 80, 'n_estimators': 600, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 22, 'max_depth': 200}
avg roc: 0.840708095618


iteration : 2
{'max_leaf_nodes': None, 'min_samples_leaf': 80, 'n_estimators': 100, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 2, 'max_depth': 100}
avg roc: 0.842298394774


iteration : 3
{'max_leaf_nodes': 70, 'min_samples_leaf': 60, 'n_estimators': 200, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 8, 'max_depth': 200}
avg roc: 0.84188640587


iteration : 4
{'max_leaf_nodes': 50, 'min_samples_leaf': 20, 'n_estimators': 100, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 22, 'max_depth': 90}
avg roc: 0.839759284453


iteration : 5
{'max_leaf_nodes': 40, 'min_samples_leaf': 20, 'n_estimators': 100, '

avg roc: 0.839369909439


iteration : 45
{'max_leaf_nodes': 45, 'min_samples_leaf': 80, 'n_estimators': 200, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 12, 'max_depth': 90}
avg roc: 0.840781083508


iteration : 46
{'max_leaf_nodes': None, 'min_samples_leaf': 20, 'n_estimators': 1000, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 22, 'max_depth': 500}
avg roc: 0.840990261412


iteration : 47
{'max_leaf_nodes': None, 'min_samples_leaf': 20, 'n_estimators': 300, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 2, 'max_depth': 100}
avg roc: 0.840726758834


iteration : 48
{'max_leaf_nodes': None, 'min_samples_leaf': 20, 'n_estimators': 1000, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 2, 'max_depth': 10}
avg roc: 0.84310773936


iteration : 49
{'max_leaf_nodes': 50, 'min_samples_leaf': 20, 'n_estimators': 300, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 12, 'max_depth': 90}
avg roc: 0.839937181273


iteration : 50
{'max_leaf_nodes': 45, 'min_samp

avg roc: 0.841895234169


iteration : 90
{'max_leaf_nodes': None, 'min_samples_leaf': 20, 'n_estimators': 1000, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 22, 'max_depth': 80}
avg roc: 0.840990261412


iteration : 91
{'max_leaf_nodes': 70, 'min_samples_leaf': 60, 'n_estimators': 600, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 2, 'max_depth': 200}
avg roc: 0.841938988785


iteration : 92
{'max_leaf_nodes': 45, 'min_samples_leaf': 60, 'n_estimators': 1000, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 2, 'max_depth': 80}
avg roc: 0.84070612577


iteration : 93
{'max_leaf_nodes': 40, 'min_samples_leaf': 60, 'n_estimators': 100, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 2, 'max_depth': 10}
avg roc: 0.840044839148


iteration : 94
{'max_leaf_nodes': 40, 'min_samples_leaf': 20, 'n_estimators': 600, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 22, 'max_depth': 500}
avg roc: 0.839455900246


iteration : 95
{'max_leaf_nodes': None, 'min_samples

KeyboardInterrupt: 

In [215]:
#run rf cv once
max_depth = 10
n_estimators = 1000
min_samples_leaf = 20
min_samples_split = 2
max_leaf_nodes = None
min_weight_fraction_leaf = 0.0

splits = 5

skf = StratifiedKFold(n_splits=splits, shuffle=True,  random_state=0)

roc_score = 0

train_norm = np.array(train_norm)
train_label = np.array(train_label)

for train_index, test_index in skf.split(train_norm, train_label):
    data_train , data_test = train_norm[train_index], train_norm[test_index]
    label_train , label_test = train_label[train_index], train_label[test_index]

    pred_test = random_forest(data_train, data_test, label_train.ravel(), max_depth,n_estimators,min_samples_leaf,min_samples_split, max_leaf_nodes, min_weight_fraction_leaf)
    roc_score += metrics.roc_auc_score(label_test, pred_test)


print "avg roc: {}".format(roc_score/splits)
print "\n"

avg roc: 0.843098274415




In [73]:
#tune xgboost

MAX_DEPTH = [3, 5, 6, 10, 50]
N_ESTIMATORS = [300, 600, 100]

configsTried = []
iteration = 0

while iteration < 2520:

    max_depth = random.choice(MAX_DEPTH)
    n_estimators = random.choice(N_ESTIMATORS)
    
    cfg = {
        "max_depth" : max_depth,
        "n_estimators": n_estimators}
    
    if cfg in configsTried:
        continue

    print "iteration : {}".format(iteration)
    print cfg

    splits = 5

    skf = StratifiedKFold(n_splits=splits, shuffle=True,  random_state=0)

    roc_score = 0

    train_norm = np.array(train_norm)
    train_label = np.array(train_label)

    for train_index, test_index in skf.split(train_norm, train_label):
        data_train , data_test = train_norm[train_index], train_norm[test_index]
        label_train , label_test = train_label[train_index], train_label[test_index]

        pred_test = xgb(data_train, data_test, label_train.ravel(), max_depth,n_estimators)
        roc_score += metrics.roc_auc_score(label_test, pred_test)

    configsTried.append(cfg)
    iteration += 1
    print "avg roc: {}".format(roc_score/splits)
    print "\n"

iteration : 0
{'n_estimators': 300, 'max_depth': 6}
avg roc: 0.920035125072


iteration : 1
{'n_estimators': 100, 'max_depth': 6}
avg roc: 0.882771758767


iteration : 2
{'n_estimators': 600, 'max_depth': 6}
avg roc: 0.955098941638


iteration : 3
{'n_estimators': 100, 'max_depth': 10}
avg roc: 0.956566666225


iteration : 4
{'n_estimators': 600, 'max_depth': 10}


KeyboardInterrupt: 

In [236]:
#run xgb cv once
max_depth = 10
n_estimators = 150


splits = 5

skf = StratifiedKFold(n_splits=splits, shuffle=True,  random_state=0)

roc_score = 0

train_norm = np.array(train_norm)
train_label = np.array(train_label)

for train_index, test_index in skf.split(train_norm, train_label):
    data_train , data_test = train_norm[train_index], train_norm[test_index]
    label_train , label_test = train_label[train_index], train_label[test_index]

    pred_test = xgb(data_train, data_test, label_train.ravel(), max_depth,n_estimators)
    roc_score += metrics.roc_auc_score(label_test, pred_test)


print "avg roc: {}".format(roc_score/splits)
print "\n"

avg roc: 0.970636274689




In [216]:
# run rf on test 

max_depth = 10
n_estimators = 1000
min_samples_leaf = 20
min_samples_split = 2
max_leaf_nodes = None
min_weight_fraction_leaf = 0.0

train_norm = np.array(train_norm)
train_label = np.array(train_label)
test_norm = np.array(test_norm)

pred_realtest_rf = random_forest(train_norm, test_norm, train_label.ravel(),max_depth,n_estimators,min_samples_leaf,min_samples_split, max_leaf_nodes, min_weight_fraction_leaf)

In [237]:
# run xgb on test 

max_depth = 10
n_estimators = 100

train_norm = np.array(train_norm)
train_label = np.array(train_label)
test_norm = np.array(test_norm)

pred_realtest_xgb = xgb(train_norm, test_norm, train_label.ravel(),max_depth,n_estimators)

In [238]:
# combine rf and xgb
pred_realtest = 0.9*pred_realtest_xgb + 0.1*pred_realtest_rf

In [240]:
# second optimization task 

incentives_init = np.random.randn(pred_realtest.shape[0],1)

In [241]:
premium = test[["premium"]]
premium = np.array(premium)

In [242]:
incentives = tf.Variable(incentives_init, dtype = tf.float32)


effort_2ndterm = - tf.divide(incentives, 400.)
effort = tf.multiply(10., tf.subtract(1. , tf.exp(effort_2ndterm)))

deltaP_2ndterm = - tf.divide(effort, 5.)
deltaP = tf.multiply(20., tf.subtract(1. , tf.exp(deltaP_2ndterm)))

pred_realtest_tf = tf.cast(pred_realtest, tf.float32)

deltaP = tf.multiply(pred_realtest_tf, deltaP)
premium_tf = tf.cast(premium, tf.float32)
net_revenue = -tf.reduce_sum(tf.subtract( tf.multiply( tf.add(pred_realtest_tf, deltaP), premium_tf), incentives))

In [243]:
train_tf = tf.train.GradientDescentOptimizer(0.01).minimize(net_revenue)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [244]:
for i in range(3000):
    sess.run(train_tf)
    print sess.run(net_revenue)

-930154940.0
-1338864400.0
-1657888500.0
-1919244800.0
-2139946000.0
-2330324700.0
-2497191200.0
-2645286400.0
-2778062000.0
-2898101000.0
-3007400000.0
-3107527200.0
-3199740000.0
-3285053700.0
-3364311600.0
-3438215400.0
-3507350800.0
-3572222200.0
-3633258000.0
-3690824200.0
-3745241600.0
-3796794400.0
-3845723000.0
-3892251100.0
-3936564200.0
-3978835500.0
-4019219000.0
-4057848300.0
-4094853600.0
-4130340900.0
-4164411600.0
-4197158000.0
-4228665000.0
-4259007500.0
-4288257800.0
-4316476400.0
-4343724000.0
-4370054000.0
-4395519000.0
-4420161000.0
-4444027000.0
-4467156000.0
-4489581600.0
-4511343600.0
-4532467700.0
-4552993000.0
-4572938000.0
-4592336000.0
-4611207000.0
-4629575700.0
-4647464000.0
-4664892400.0
-4681878000.0
-4698441700.0
-4714601500.0
-4730369000.0
-4745762000.0
-4760800000.0
-4775485400.0
-4789836300.0
-4803869000.0
-4817592000.0
-4831021600.0
-4844158000.0
-4857016300.0
-4869607000.0
-4881938400.0
-4894016500.0
-4905856000.0
-4917460000.0
-4928837600.0
-493999

-6038527000.0
-6038899700.0
-6039272400.0
-6039636000.0
-6040002600.0
-6040368000.0
-6040727600.0
-6041093000.0
-6041453600.0
-6041818000.0
-6042182700.0
-6042539500.0
-6042901000.0
-6043259000.0
-6043614000.0
-6043970000.0
-6044325400.0
-6044682000.0
-6045036000.0
-6045390000.0
-6045744600.0
-6046092300.0
-6046449700.0
-6046797000.0
-6047143000.0
-6047494000.0
-6047841300.0
-6048185300.0
-6048529400.0
-6048880000.0
-6049228000.0
-6049567000.0
-6049908700.0
-6050252000.0
-6050595000.0
-6050930700.0
-6051271000.0
-6051611000.0
-6051951600.0
-6052287500.0
-6052623400.0
-6052959000.0
-6053296000.0
-6053631500.0
-6053961000.0
-6054293500.0
-6054630000.0
-6054956000.0
-6055290000.0
-6055620600.0
-6055950300.0
-6056276500.0
-6056599000.0
-6056928000.0
-6057256000.0
-6057580000.0
-6057906000.0
-6058229000.0
-6058555400.0
-6058873000.0
-6059196400.0
-6059521000.0
-6059837400.0
-6060157000.0
-6060477400.0
-6060796400.0
-6061113300.0
-6061426700.0
-6061741000.0
-6062062600.0
-6062376400.0
-60626

-6180197400.0
-6180312000.0
-6180427300.0
-6180541000.0
-6180652500.0
-6180768300.0
-6180880400.0
-6180992000.0
-6181104000.0
-6181214000.0
-6181328000.0
-6181439500.0
-6181550600.0
-6181658000.0
-6181772300.0
-6181884000.0
-6181994500.0
-6182107000.0
-6182217000.0
-6182329000.0
-6182438000.0
-6182547500.0
-6182659000.0
-6182767600.0
-6182876000.0
-6182987000.0
-6183095300.0
-6183206000.0
-6183315500.0
-6183427000.0
-6183535600.0
-6183643000.0
-6183752700.0
-6183864000.0
-6183970300.0
-6184078000.0
-6184188000.0
-6184298500.0
-6184406000.0
-6184512500.0
-6184618500.0
-6184723500.0
-6184832000.0
-6184945000.0
-6185051000.0
-6185158000.0
-6185266000.0
-6185374000.0
-6185480700.0
-6185586000.0
-6185690000.0
-6185794600.0
-6185899000.0
-6186010600.0
-6186116000.0
-6186225000.0
-6186330000.0
-6186434600.0
-6186540000.0
-6186647600.0
-6186752000.0
-6186857000.0
-6186961000.0
-6187067400.0
-6187171000.0
-6187275300.0
-6187378700.0
-6187485000.0
-6187588600.0
-6187691500.0
-6187797000.0
-61879

-6233089000.0
-6233146400.0
-6233202000.0
-6233260000.0
-6233318400.0
-6233374000.0
-6233433000.0
-6233490400.0
-6233549000.0
-6233605000.0
-6233662000.0
-6233720000.0
-6233777000.0
-6233833500.0
-6233893000.0
-6233949700.0
-6234006500.0
-6234062000.0
-6234118700.0
-6234175500.0
-6234233000.0
-6234288600.0
-6234346000.0
-6234402300.0
-6234459600.0
-6234515500.0
-6234569700.0
-6234629000.0
-6234687500.0
-6234746000.0
-6234800600.0
-6234858000.0
-6234914000.0
-6234972000.0
-6235027500.0
-6235081700.0
-6235139600.0
-6235195400.0
-6235248600.0
-6235306000.0
-6235362300.0
-6235416600.0
-6235471000.0
-6235525000.0
-6235581400.0
-6235636700.0
-6235692000.0
-6235746300.0
-6235800600.0
-6235855000.0
-6235912700.0
-6235965400.0
-6236022000.0
-6236077000.0
-6236133400.0
-6236187600.0
-6236242400.0
-6236299300.0
-6236354600.0
-6236410400.0
-6236467000.0
-6236521000.0
-6236576300.0
-6236631000.0
-6236685000.0
-6236739600.0
-6236795000.0
-6236851000.0
-6236905500.0
-6236962000.0
-6237017600.0
-62370

-6261356500.0
-6261392400.0
-6261427700.0
-6261465000.0
-6261501000.0
-6261533700.0
-6261570600.0
-6261605400.0
-6261641000.0
-6261678000.0
-6261714000.0
-6261751300.0
-6261787000.0
-6261822500.0
-6261858300.0
-6261894000.0
-6261930000.0
-6261965000.0
-6261999600.0
-6262035000.0
-6262070300.0
-6262106000.0
-6262143000.0
-6262178000.0
-6262212600.0
-6262250500.0
-6262285300.0
-6262321000.0
-6262356000.0
-6262390000.0
-6262425000.0
-6262461000.0
-6262495700.0
-6262531000.0
-6262566000.0
-6262599000.0
-6262633500.0
-6262669000.0
-6262705700.0
-6262740500.0
-6262775300.0
-6262811600.0
-6262846500.0
-6262880000.0
-6262914600.0
-6262949000.0
-6262984700.0
-6263020500.0
-6263055400.0
-6263090000.0
-6263126000.0
-6263163000.0
-6263195000.0
-6263231000.0
-6263266300.0
-6263302000.0
-6263340000.0
-6263375000.0
-6263410700.0
-6263443500.0
-6263478300.0
-6263514000.0
-6263550000.0
-6263584300.0
-6263620600.0
-6263655400.0
-6263690000.0
-6263723000.0
-6263756000.0
-6263789600.0
-6263823400.0
-62638

In [245]:
final_incentives = sess.run(incentives)

In [246]:
final_incentives

array([[ 762.1772 ],
       [1112.3856 ],
       [1028.6694 ],
       ...,
       [ 515.47705],
       [1049.9154 ],
       [ 889.3432 ]], dtype=float32)

In [247]:
output_file_path = "sample_submission_sLex1ul.csv"
output_file = pd.read_csv(output_file_path ,sep=",", index_col=None, dtype = 'float32' )

In [248]:
output_file['renewal'] = pred_realtest_xgb
output_file['incentives'] = final_incentives

In [249]:
output_file.to_csv('output_file/output_xgb_3000iterations_newnormed.csv', sep=',', index = False)