# Notebook for building an XGBoost model for the Sendgrid sign up dataset
***
**Jake Mitchell Scott Schubert**

Initially using ______

In [18]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from random import randint
from datetime import datetime

In [50]:
train_path = 'data/signup_train_data.csv'
test_path = 'data/signup_test_data.csv'


# Load the data into a DataFrame 
df = pd.read_csv(train_path, low_memory=False)
test_df = pd.read_csv(test_path, low_memory=False)

# Split into X and Y
dfLabels = df.pop("label")
#print (df.head())

#Feature Engineering
#for index, row in df.iterrows():
#    df['time_of_day_int'] = introw['created_at'][11:13])*3600 + int(row['created_at'][14:16])*60 + int(row['created_at'][17:19])

df['employee_count_low'] = int(df['employee_count'].str.split()[0][0])
df['employee_count_high'] = int(df['employee_count'].str.split()[0][2])
test_df['employee_count_low'] = int(df['employee_count'].str.split()[0][0])
test_df['employee_count_high'] = int(df['employee_count'].str.split()[0][2])

#Temporarily parse non numerical data
frames_to_remove = ['zip', 'city', 'website', 'state', 'country', 'registration_ip', 'company','multifactor_country_code','created_at','lead_source','marketing_channel','volume','user_persona','initial_package','employee_count','geolocation_notes','name_notes','ip_notes','community_notes','email_notes','activity_notes','fingerprint_notes']
df = df.drop(frames_to_remove, axis=1)
test_df = test_df.drop(frames_to_remove, axis=1)

df.head()




Unnamed: 0,id,banned_ip,ip_count,is_authy_verified,banned_email,is_transactional,is_marketing,is_behavioral,is_oem,risk,...,mfa_required,mfa_completed,whitelabel_required,whitelabel_completed,payment_required,payment_completed,profile_completed,email_completed,employee_count_low,employee_count_high
0,0,6,8,0,1,0,0,0,0,-100,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1,500
1,1,5,5,0,1,1,0,0,0,-90,...,,,,,,,,,1,500
2,2,0,1,0,0,1,0,0,0,-44,...,,,,,,,,,1,500
3,3,4,4,0,2,0,1,0,0,-72,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1,500
4,4,0,1,0,0,1,0,0,0,-34,...,,,,,,,,,1,500


In [54]:
def xgb_basic(seed_in, TO_TEST, eta=0.1, depth=3, n_estimators=100):
    # split data into train and test sets
    seed = seed_in
    test_size = 0.5
    X_train, X_test, y_train, y_test = train_test_split(df, dfLabels, test_size=test_size, random_state=seed)

    # fit model no training data
    model = XGBClassifier(learning_rate=eta, max_depth=depth, n_estimators=n_estimators)
    model.fit(X_train, y_train)
    
    # make predictions for test data
    y_pred = model.predict(X_test)
    predictions = [round(value) for value in y_pred]

    # evaluate predictions
    accuracy = accuracy_score(y_test, predictions)
    print("Seed: %i Eta: %f Depth: %i Estimators: %i  -  Accuracy: %.3f%%" % (seed, eta, depth, n_estimators, accuracy * 100.0))
    
    if TO_TEST:
        #make predictions for train data
        y_pred = model.predict(test_df)
        predictions = [round(value) for value in y_pred]

        d = {'id':test_df['id'], 'label':predictions}
        preds = pd.DataFrame(data=d)

        preds.to_csv('predictions.csv', index=False)

In [55]:
for n_estimators in [50,60,70,80,100,110,120,130,140,150]:
    xgb_basic(31,False, depth=9, eta=0.15, n_estimators=n_estimators)

Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 50  -  Accuracy: 97.328%
Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 60  -  Accuracy: 97.348%
Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 70  -  Accuracy: 97.351%
Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 80  -  Accuracy: 97.356%
Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 100  -  Accuracy: 97.379%
Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 110  -  Accuracy: 97.383%
Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 120  -  Accuracy: 97.393%
Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 130  -  Accuracy: 97.378%
Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 140  -  Accuracy: 97.383%
Seed: 31 Eta: 0.150000 Depth: 9 Estimators: 150  -  Accuracy: 97.376%


In [56]:
xgb_basic(1, True, depth=9, eta=0.15, n_estimators=120)

Seed: 1 Eta: 0.150000 Depth: 9 Estimators: 120  -  Accuracy: 97.383%


In [None]:
28, 31
'''
Seed: 31 Eta: 0.090000 Depth: 1  -  Accuracy: 93.93%
Seed: 31 Eta: 0.095000 Depth: 1  -  Accuracy: 93.93%
Seed: 31 Eta: 0.100000 Depth: 1  -  Accuracy: 93.96%
Seed: 31 Eta: 0.105000 Depth: 1  -  Accuracy: 93.97%
Seed: 31 Eta: 0.110000 Depth: 1  -  Accuracy: 94.00%
Seed: 31 Eta: 0.115000 Depth: 1  -  Accuracy: 94.05%
Seed: 31 Eta: 0.120000 Depth: 1  -  Accuracy: 93.99%
Seed: 31 Eta: 0.125000 Depth: 1  -  Accuracy: 93.98%
Seed: 31 Eta: 0.090000 Depth: 2  -  Accuracy: 95.29%
Seed: 31 Eta: 0.095000 Depth: 2  -  Accuracy: 95.28%
Seed: 31 Eta: 0.100000 Depth: 2  -  Accuracy: 95.33%
Seed: 31 Eta: 0.105000 Depth: 2  -  Accuracy: 95.32%
Seed: 31 Eta: 0.110000 Depth: 2  -  Accuracy: 95.47%
Seed: 31 Eta: 0.115000 Depth: 2  -  Accuracy: 95.51%
Seed: 31 Eta: 0.120000 Depth: 2  -  Accuracy: 95.47%
Seed: 31 Eta: 0.125000 Depth: 2  -  Accuracy: 95.57%
Seed: 31 Eta: 0.090000 Depth: 3  -  Accuracy: 95.91%
Seed: 31 Eta: 0.095000 Depth: 3  -  Accuracy: 95.94%
Seed: 31 Eta: 0.100000 Depth: 3  -  Accuracy: 96.05%
Seed: 31 Eta: 0.105000 Depth: 3  -  Accuracy: 96.02%
Seed: 31 Eta: 0.110000 Depth: 3  -  Accuracy: 96.00%
Seed: 31 Eta: 0.115000 Depth: 3  -  Accuracy: 96.07%
Seed: 31 Eta: 0.120000 Depth: 3  -  Accuracy: 96.08%
Seed: 31 Eta: 0.125000 Depth: 3  -  Accuracy: 96.09%
Seed: 31 Eta: 0.090000 Depth: 4  -  Accuracy: 96.38%
Seed: 31 Eta: 0.095000 Depth: 4  -  Accuracy: 96.37%
Seed: 31 Eta: 0.100000 Depth: 4  -  Accuracy: 96.43%
Seed: 31 Eta: 0.105000 Depth: 4  -  Accuracy: 96.44%
Seed: 31 Eta: 0.110000 Depth: 4  -  Accuracy: 96.40%
Seed: 31 Eta: 0.115000 Depth: 4  -  Accuracy: 96.54%
Seed: 31 Eta: 0.120000 Depth: 4  -  Accuracy: 96.45%
Seed: 31 Eta: 0.125000 Depth: 4  -  Accuracy: 96.57%
Seed: 31 Eta: 0.090000 Depth: 5  -  Accuracy: 96.81%
Seed: 31 Eta: 0.095000 Depth: 5  -  Accuracy: 96.73%
Seed: 31 Eta: 0.100000 Depth: 5  -  Accuracy: 96.74%
Seed: 31 Eta: 0.105000 Depth: 5  -  Accuracy: 96.85%
Seed: 31 Eta: 0.110000 Depth: 5  -  Accuracy: 96.84%
Seed: 31 Eta: 0.115000 Depth: 5  -  Accuracy: 96.86%
Seed: 31 Eta: 0.120000 Depth: 5  -  Accuracy: 96.98%
Seed: 31 Eta: 0.125000 Depth: 5  -  Accuracy: 96.90%
Seed: 31 Eta: 0.090000 Depth: 6  -  Accuracy: 97.06%
Seed: 31 Eta: 0.095000 Depth: 6  -  Accuracy: 97.06%
Seed: 31 Eta: 0.100000 Depth: 6  -  Accuracy: 97.06%
Seed: 31 Eta: 0.105000 Depth: 6  -  Accuracy: 97.08%
Seed: 31 Eta: 0.110000 Depth: 6  -  Accuracy: 97.07%
Seed: 31 Eta: 0.115000 Depth: 6  -  Accuracy: 97.12%
Seed: 31 Eta: 0.120000 Depth: 6  -  Accuracy: 97.16%
Seed: 31 Eta: 0.125000 Depth: 6  -  Accuracy: 97.15%
Seed: 31 Eta: 0.090000 Depth: 7  -  Accuracy: 97.25%
Seed: 31 Eta: 0.095000 Depth: 7  -  Accuracy: 97.22%
Seed: 31 Eta: 0.100000 Depth: 7  -  Accuracy: 97.23%
Seed: 31 Eta: 0.105000 Depth: 7  -  Accuracy: 97.23%
Seed: 31 Eta: 0.110000 Depth: 7  -  Accuracy: 97.23%
Seed: 31 Eta: 0.115000 Depth: 7  -  Accuracy: 97.24%
Seed: 31 Eta: 0.120000 Depth: 7  -  Accuracy: 97.26%
Seed: 31 Eta: 0.125000 Depth: 7  -  Accuracy: 97.24%
Seed: 31 Eta: 0.090000 Depth: 8  -  Accuracy: 97.28%
Seed: 31 Eta: 0.095000 Depth: 8  -  Accuracy: 97.29%
Seed: 31 Eta: 0.100000 Depth: 8  -  Accuracy: 97.30%
Seed: 31 Eta: 0.105000 Depth: 8  -  Accuracy: 97.30%
Seed: 31 Eta: 0.110000 Depth: 8  -  Accuracy: 97.32%
Seed: 31 Eta: 0.115000 Depth: 8  -  Accuracy: 97.32%
Seed: 31 Eta: 0.120000 Depth: 8  -  Accuracy: 97.32%
Seed: 31 Eta: 0.125000 Depth: 8  -  Accuracy: 97.31%
Seed: 31 Eta: 0.090000 Depth: 9  -  Accuracy: 97.33%
Seed: 31 Eta: 0.095000 Depth: 9  -  Accuracy: 97.36%
Seed: 31 Eta: 0.100000 Depth: 9  -  Accuracy: 97.34%
Seed: 31 Eta: 0.105000 Depth: 9  -  Accuracy: 97.34%
Seed: 31 Eta: 0.110000 Depth: 9  -  Accuracy: 97.35%
Seed: 31 Eta: 0.115000 Depth: 9  -  Accuracy: 97.34%
Seed: 31 Eta: 0.120000 Depth: 9  -  Accuracy: 97.36%
Seed: 31 Eta: 0.125000 Depth: 9  -  Accuracy: 97.36%
Seed: 31 Eta: 0.090000 Depth: 10  -  Accuracy: 97.34%
Seed: 31 Eta: 0.095000 Depth: 10  -  Accuracy: 97.34%
Seed: 31 Eta: 0.100000 Depth: 10  -  Accuracy: 97.34%
Seed: 31 Eta: 0.105000 Depth: 10  -  Accuracy: 97.36%
Seed: 31 Eta: 0.110000 Depth: 10  -  Accuracy: 97.35%
Seed: 31 Eta: 0.115000 Depth: 10  -  Accuracy: 97.35%
Seed: 31 Eta: 0.120000 Depth: 10  -  Accuracy: 97.35%
Seed: 31 Eta: 0.125000 Depth: 10  -  Accuracy: 97.37%
Seed: 31 Eta: 0.090000 Depth: 11  -  Accuracy: 97.33%
Seed: 31 Eta: 0.095000 Depth: 11  -  Accuracy: 97.35%
Seed: 31 Eta: 0.100000 Depth: 11  -  Accuracy: 97.35%
Seed: 31 Eta: 0.105000 Depth: 11  -  Accuracy: 97.35%
Seed: 31 Eta: 0.110000 Depth: 11  -  Accuracy: 97.35%
Seed: 31 Eta: 0.115000 Depth: 11  -  Accuracy: 97.33%
Seed: 31 Eta: 0.120000 Depth: 11  -  Accuracy: 97.35%
Seed: 31 Eta: 0.125000 Depth: 11  -  Accuracy: 97.34%
Seed: 31 Eta: 0.090000 Depth: 12  -  Accuracy: 97.33%
Seed: 31 Eta: 0.095000 Depth: 12  -  Accuracy: 97.35%
Seed: 31 Eta: 0.100000 Depth: 12  -  Accuracy: 97.32%
Seed: 31 Eta: 0.105000 Depth: 12  -  Accuracy: 97.34%
Seed: 31 Eta: 0.110000 Depth: 12  -  Accuracy: 97.34%
Seed: 31 Eta: 0.115000 Depth: 12  -  Accuracy: 97.34%
Seed: 31 Eta: 0.120000 Depth: 12  -  Accuracy: 97.34%
Seed: 31 Eta: 0.125000 Depth: 12  -  Accuracy: 97.34%
Seed: 31 Eta: 0.090000 Depth: 13  -  Accuracy: 97.33%
Seed: 31 Eta: 0.095000 Depth: 13  -  Accuracy: 97.33%
Seed: 31 Eta: 0.100000 Depth: 13  -  Accuracy: 97.33%
Seed: 31 Eta: 0.105000 Depth: 13  -  Accuracy: 97.32%
Seed: 31 Eta: 0.110000 Depth: 13  -  Accuracy: 97.34%
Seed: 31 Eta: 0.115000 Depth: 13  -  Accuracy: 97.32%

Seed: 31 Eta: 0.050000 Depth: 8  -  Accuracy: 97.20%
Seed: 31 Eta: 0.055000 Depth: 8  -  Accuracy: 97.22%
Seed: 31 Eta: 0.060000 Depth: 8  -  Accuracy: 97.25%
Seed: 31 Eta: 0.065000 Depth: 8  -  Accuracy: 97.24%
Seed: 31 Eta: 0.070000 Depth: 8  -  Accuracy: 97.27%
Seed: 31 Eta: 0.075000 Depth: 8  -  Accuracy: 97.26%
Seed: 31 Eta: 0.080000 Depth: 8  -  Accuracy: 97.26%
Seed: 31 Eta: 0.085000 Depth: 8  -  Accuracy: 97.28%
Seed: 31 Eta: 0.090000 Depth: 8  -  Accuracy: 97.28%
Seed: 31 Eta: 0.095000 Depth: 8  -  Accuracy: 97.29%
Seed: 31 Eta: 0.100000 Depth: 8  -  Accuracy: 97.30%
Seed: 31 Eta: 0.105000 Depth: 8  -  Accuracy: 97.30%
Seed: 31 Eta: 0.110000 Depth: 8  -  Accuracy: 97.32%
Seed: 31 Eta: 0.115000 Depth: 8  -  Accuracy: 97.32%
Seed: 31 Eta: 0.120000 Depth: 8  -  Accuracy: 97.32%
Seed: 31 Eta: 0.125000 Depth: 8  -  Accuracy: 97.31%
Seed: 31 Eta: 0.130000 Depth: 8  -  Accuracy: 97.32%
Seed: 31 Eta: 0.135000 Depth: 8  -  Accuracy: 97.34%
Seed: 31 Eta: 0.140000 Depth: 8  -  Accuracy: 97.34%
Seed: 31 Eta: 0.145000 Depth: 8  -  Accuracy: 97.34%
Seed: 31 Eta: 0.150000 Depth: 8  -  Accuracy: 97.35%
Seed: 31 Eta: 0.050000 Depth: 9  -  Accuracy: 97.27%
Seed: 31 Eta: 0.055000 Depth: 9  -  Accuracy: 97.29%
Seed: 31 Eta: 0.060000 Depth: 9  -  Accuracy: 97.30%
Seed: 31 Eta: 0.065000 Depth: 9  -  Accuracy: 97.29%
Seed: 31 Eta: 0.070000 Depth: 9  -  Accuracy: 97.30%
Seed: 31 Eta: 0.075000 Depth: 9  -  Accuracy: 97.30%
Seed: 31 Eta: 0.080000 Depth: 9  -  Accuracy: 97.31%
Seed: 31 Eta: 0.085000 Depth: 9  -  Accuracy: 97.30%
Seed: 31 Eta: 0.090000 Depth: 9  -  Accuracy: 97.33%
Seed: 31 Eta: 0.095000 Depth: 9  -  Accuracy: 97.36%
Seed: 31 Eta: 0.100000 Depth: 9  -  Accuracy: 97.34%
Seed: 31 Eta: 0.105000 Depth: 9  -  Accuracy: 97.34%
Seed: 31 Eta: 0.110000 Depth: 9  -  Accuracy: 97.35%
Seed: 31 Eta: 0.115000 Depth: 9  -  Accuracy: 97.34%
Seed: 31 Eta: 0.120000 Depth: 9  -  Accuracy: 97.36%
Seed: 31 Eta: 0.125000 Depth: 9  -  Accuracy: 97.36%
Seed: 31 Eta: 0.130000 Depth: 9  -  Accuracy: 97.34%
Seed: 31 Eta: 0.135000 Depth: 9  -  Accuracy: 97.35%
Seed: 31 Eta: 0.140000 Depth: 9  -  Accuracy: 97.38%
Seed: 31 Eta: 0.145000 Depth: 9  -  Accuracy: 97.37%
Seed: 31 Eta: 0.150000 Depth: 9  -  Accuracy: 97.38%
Seed: 31 Eta: 0.050000 Depth: 10  -  Accuracy: 97.27%
Seed: 31 Eta: 0.055000 Depth: 10  -  Accuracy: 97.30%
Seed: 31 Eta: 0.060000 Depth: 10  -  Accuracy: 97.30%

Seed: 31 Eta: 0.050000 Depth: 9  -  Accuracy: 97.27%
Seed: 31 Eta: 0.090000 Depth: 9  -  Accuracy: 97.33%
Seed: 31 Eta: 0.100000 Depth: 9  -  Accuracy: 97.34%
Seed: 31 Eta: 0.120000 Depth: 9  -  Accuracy: 97.36%
Seed: 31 Eta: 0.140000 Depth: 9  -  Accuracy: 97.38%
Seed: 31 Eta: 0.145000 Depth: 9  -  Accuracy: 97.37%
Seed: 31 Eta: 0.150000 Depth: 9  -  Accuracy: 97.38%
Seed: 31 Eta: 0.160000 Depth: 9  -  Accuracy: 97.36%
Seed: 31 Eta: 0.170000 Depth: 9  -  Accuracy: 97.37%
Seed: 31 Eta: 0.180000 Depth: 9  -  Accuracy: 97.36%
Seed: 31 Eta: 0.200000 Depth: 9  -  Accuracy: 97.38%
'''