In [59]:
import pickle
import gzip

import pandas as pd
import numpy as np
import scipy.sparse as sp

from tqdm import tqdm_notebook as tqdm
from sklearn.metrics import roc_auc_score

import competition_utils as u

In [60]:
from numba import jit

In [2]:
X_train = sp.load_npz('tmp/X_train_sparse.npz')
X_val = sp.load_npz('tmp/X_val_sparse.npz')

y_train = np.load('tmp/y_train.npy', )
y_val = np.load('tmp/y_val.npy', )

y_train = y_train.astype(np.float32)
y_val = y_val.astype(np.float32)

In [3]:
import ftrl

In [None]:
model = ftrl.FtrlProximal(alpha=0.1, beta=1, l1=75, l2=25)

In [None]:
for i in tqdm(range(30)):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    auc = roc_auc_score(y_val, y_pred)
    print(i + 1, auc)

In [4]:
X = sp.vstack([X_train, X_val])
y = np.concatenate([y_train, y_val])

Full model

In [5]:
%%time

model_full = ftrl.FtrlProximal(alpha=0.1, beta=1, l1=75, l2=25)
model_full.fit(X, y, num_passes=22)

KeyboardInterrupt: 

In [10]:
shift = 1.1875
scale = 850100

def shifted_scaled_sigmoid(x, shift=0, scale=1):
    s = 1 / (1 + np.exp(-x + shift))
    return (s * scale).round(2)

In [6]:
it_test = u.read_grouped('data/criteo_test_release.txt.gz')

In [12]:
f_out = open('pred_ftrl_argmax15.txt', 'w')

for gid, group in tqdm(it_test, total=7087738):
    cols = []
    vals = []

    for line in group:
        cols.append(line.idx)
        vals.append(line.val)

    X_val = u.to_csr(cols, vals)

    pred = model_full.predict(X_val)
    m = pred.argmax()
    pred[m] = pred[m] + 15
    #pred = shifted_scaled_sigmoid(pred, shift, scale)

    pred_str = u.to_prediction_str(gid, pred)
    
    f_out.write(pred_str)
    f_out.write('\n')

f_out.flush()
f_out.close()




In [14]:
!gzip pred_ftrl2.txt

[1m[36mCrowdAI.Event.Authentication[0m : Authenticating for challenge = [4m[1m[34mCriteoAdPlacementNIPS2017[0m
[1m[32mCrowdAI.Event.Connection.CONNECTED[0m[1m[32m[0m
[1m[32mCrowdAI.Event.Authentication.SUCCESS[0m : [1m[32mAuthentication Successful[0m


In [16]:
import crowdai
challenge = crowdai.Challenge("CriteoAdPlacementNIPS2017", 'd671d30799fa215f63a5cb5049983c79')
scores = challenge.submit('pred_ftrl2.txt.gz')

[1m[34mCrowdAI.Event.Misc.FILE_UPLOAD : Preparing for file upload[0m


  0% |          |[00:00<00:40]   2.45% /s] 

[1m[34mCrowdAI.Event.Misc.FILE_UPLOAD : Uploading file[0m


100% |██████████|[00:08<00:00]  12.23% /s] 
  0% |          |[00:00<?]  ?% /s] 

[1m[33mCrowdAI.Event.Job.ENQUEUED[0m : 7760cc63-c4fe-49dd-b994-e0ee3485de3b


                      0% |          |[00:00<?]  ?% /s]                     [1m[32m7760cc63-c4fe-49dd-b994-e0ee3485de3b[0m:   0% |          |[00:00<?]  ?% /s] 

[1m[34mCrowdAI.Event.Job.RUNNING[0m : 7760cc63-c4fe-49dd-b994-e0ee3485de3b
[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) Beginning grading of the submission


[1m[32m7760cc63-c4fe-49dd-b994-e0ee3485de3b[0m: 100% |█████████▉|[1:26:38<00:00]  52.22s/% ] 

[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) Scores Computed Successfully !!
[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) ImpWt_std: 0.0134043441367
[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) Uploading scores to the leaderboard....
[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) SNIPS_std: 0.000993291621552
[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) SNIPS : 53.0539863777
[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) ImpWt : 0.990946687511
[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) IPS_std: 2.53459631776
[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) IPS : 52.5736720603


                    [1m[32m7760cc63-c4fe-49dd-b994-e0ee3485de3b[0m: 100% |█████████▉|[1:26:39<00:00]  52.22s/% ] [1m[32m7760cc63-c4fe-49dd-b994-e0ee3485de3b[0m: 100% |██████████|[1:26:39<00:00]  257.11s/% ] 

[1m[36mCrowdAI.Event.Job.INFO[0m : (7760cc63-c4fe-49dd-b994-e0ee3485de3b) Scores Submited Successfully !!! 
[1m[32mCrowdAI.Event.Job.COMPLETE[0m : [1m[32m7760cc63-c4fe-49dd-b994-e0ee3485de3b[0m	   🍺 


                    [1m[32m7760cc63-c4fe-49dd-b994-e0ee3485de3b[0m: 100% |██████████|[1:26:39<00:00]  257.11s/% ] 


In [17]:
scores

{'impwt': 0.9909466875114865,
 'impwt_std': 0.013404344136707162,
 'ips': 52.57367206026822,
 'ips_std': 2.5345963177595756,
 'max_instances': 7087738,
 'message': '',
 'snips': 53.05398637770695,
 'snips_std': 0.0009932916215519697}

In [154]:
scores

{'impwt': 0.983055672490548,
 'impwt_std': 0.015840261443422165,
 'ips': 46.11893916159966,
 'ips_std': 1.885006865357641,
 'max_instances': 7087738,
 'message': '',
 'snips': 46.91386302136728,
 'snips_std': 0.0007714217927742332}

In [29]:
@jit
def pred_to_array(pred):
    pred_split = pred.split(',')
    n = len(pred_split)
    out = np.zeros(n, dtype=np.float32)

    for i in range(n):
        _, p = pred_split[i].split(':')
        out[i] = float(p)

    return out

In [61]:
f_in = gzip.open('pred_ftrl2.txt.gz', 'r')
f_out = open('pred_ftrl2_argmax15.txt', 'w')

for line in tqdm(f_in, total=7087738):
    line = line.strip().decode()
    gid, pred = line.split(';')
    gid = int(gid)
    pred = pred_to_array(pred)
    m = pred.argmax()
    pred[m] = pred[m] + 15
    pred_str = u.to_prediction_str(gid, pred)

    f_out.write(pred_str)
    f_out.write('\n')

f_out.flush()
f_out.close()

f_in.close()




In [62]:
!gzip pred_ftrl2_argmax15.txt

In [63]:
import crowdai
challenge = crowdai.Challenge("CriteoAdPlacementNIPS2017", 'd671d30799fa215f63a5cb5049983c79')
scores = challenge.submit('pred_ftrl2_argmax15.txt.gz')

[1m[36mCrowdAI.Event.Authentication[0m : Authenticating for challenge = [4m[1m[34mCriteoAdPlacementNIPS2017[0m
[1m[32mCrowdAI.Event.Connection.CONNECTED[0m[1m[32m[0m
[1m[32mCrowdAI.Event.Authentication.SUCCESS[0m : [1m[32mAuthentication Successful[0m
[1m[34mCrowdAI.Event.Misc.FILE_UPLOAD : Preparing for file upload[0m


  0% |          |[00:00<00:31]   3.21% /s] 

[1m[34mCrowdAI.Event.Misc.FILE_UPLOAD : Uploading file[0m


100% |██████████|[00:05<00:00]  18.70% /s] 
  0% |          |[00:00<?]  ?% /s] 

[1m[33mCrowdAI.Event.Job.ENQUEUED[0m : 6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b


                      0% |          |[00:00<?]  ?% /s]                     [1m[32m6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b[0m:   0% |          |[00:00<?]  ?% /s] 

[1m[34mCrowdAI.Event.Job.RUNNING[0m : 6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b
[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) Beginning grading of the submission


[1m[32m6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b[0m: 100% |█████████▉|[1:28:35<00:00]  52.44s/% ] 

[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) Scores Computed Successfully !!
[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) ImpWt : 1.03342193533
[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) SNIPS_std: 0.00155018258196
[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) Uploading scores to the leaderboard....
[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) SNIPS : 53.8029788715
[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) ImpWt_std: 0.0159915074304
[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) IPS_std: 4.19923000659
[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) IPS : 55.6011785517


                    [1m[32m6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b[0m: 100% |█████████▉|[1:28:36<00:00]  52.44s/% ] [1m[32m6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b[0m: 100% |██████████|[1:28:36<00:00]  262.74s/% ] 

[1m[36mCrowdAI.Event.Job.INFO[0m : (6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b) Scores Submited Successfully !!! 
[1m[32mCrowdAI.Event.Job.COMPLETE[0m : [1m[32m6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b[0m	   🍺 


                    [1m[32m6458ac6a-ce18-4fd0-b5f8-c9ac7d17613b[0m: 100% |██████████|[1:28:36<00:00]  262.74s/% ] 


In [58]:
scores

{'impwt': 1.0291488188127471,
 'impwt_std': 0.015837512738002178,
 'ips': 55.363350975417525,
 'ips_std': 4.1683298472588195,
 'max_instances': 7087738,
 'message': '',
 'snips': 53.79528204607583,
 'snips_std': 0.0015450146469609676}

In [54]:
# raw & argmax + 15
scores

{'impwt': 1.0158197613114301,
 'impwt_std': 0.013330339899660247,
 'ips': 54.92215088227358,
 'ips_std': 4.284789023838347,
 'max_instances': 7087738,
 'message': '',
 'snips': 54.06682659074156,
 'snips_std': 0.0015984071121676571}

In [50]:
scores

{'impwt': 1.0157875564946188,
 'impwt_std': 0.013324275777335002,
 'ips': 54.9180220697687,
 'ips_std': 4.28310809259753,
 'max_instances': 7087738,
 'message': '',
 'snips': 54.06447609900372,
 'snips_std': 0.0015978265195827558}

In [46]:
scores

{'impwt': 1.011598465072558,
 'impwt_std': 0.01252083399726181,
 'ips': 54.3528817065464,
 'ips_std': 4.050219880773622,
 'max_instances': 7087738,
 'message': '',
 'snips': 53.72969966166159,
 'snips_std': 0.0015167550204357677}