## preliminaries

In [2]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools 
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null 
!apt-get update -qq 2>&1 > /dev/null 
!apt-get -y install -qq google-drive-ocamlfuse fuse 
from google.colab import auth 
auth.authenticate_user() 
from oauth2client.client import GoogleCredentials 
creds = GoogleCredentials.get_application_default() 
import getpass 
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL 
vcode = getpass.getpass() 
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

E: Package 'python-software-properties' has no installation candidate
Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
··········
Please, open the following URL in a web browser: https://accounts.google.com/o/oauth2/auth?client_id=32555940559.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&response_type=code&access_type=offline&approval_prompt=force
Please enter the verification code: Access token retrieved correctly.


In [3]:
!ls

adc.json  sample_data


In [0]:
!mkdir -p driver
!google-drive-ocamlfuse driver

In [6]:
import os
os.chdir("driver/loyalty_prediction_data")
!ls

submission_xgboost.csv	test.csv  train.csv


In [0]:
import numpy as np
import pandas as pd
import xgboost
import lightgbm as lgb
import math

## dataloading

In [0]:
def load_data(filename, is_train=True):
    data = pd.read_csv(filename, index_col=0)
    card_id = data['card_id'].values
    drop_list = ['first_active_month', 'card_id']
    if is_train:
        drop_list.append('target')
        y = data['target'].values
    data = data.drop(drop_list, axis=1)
    # fill missing values
    for c in data.columns:
        data[c] = data[c].fillna(data[c].dropna().median())
    x = data.values
    if is_train:
        return x, y, card_id
    else:
        return x, card_id
    
x_train, y_train, _ = load_data('train.csv')
x_test, card_id_test = load_data('test.csv', is_train=False)

## model

### XGBoost

In [0]:
model = xgboost.XGBRegressor(max_depth=3, learning_rate=0.1, n_estimators=100)
model.fit(x_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [0]:
pred_train = model.predict(x_train)
loss_train = math.sqrt(np.sum((pred_train - y_train) ** 2) / len(y_train))
print('Training RMSE loss: {}'.format(loss_train))

Training RMSE loss: 3.658398357883663


In [0]:
# predicting
pred_test = model.predict(x_test)
df_test = pd.DataFrame({'card_id': card_id_test, 'target': pred_test})
df_test.to_csv('submission_xgboost.csv', index=False)

### LGBM

In [0]:
params = {'num_leaves': 51,
          'min_data_in_leaf': 35, 
          'objective':'regression',
          'max_depth': -1,
          'learning_rate': 0.008,
          "boosting": "gbdt",
          "feature_fraction": 0.85,
          "bagging_freq": 1,
          "bagging_fraction": 0.82,
          "bagging_seed": 42,
          "metric": 'rmse',
          "lambda_l1": 0.11,
          "verbosity": -1,
          "nthread": 4,
          "random_state": 2019}
num_boost_round = 10000

In [0]:
dataset_train = lgb.Dataset(x_train, label=y_train)
clf = lgb.train(params, dataset_train, num_boost_round)

In [0]:
pred = clf.predict(x_test)
df_test = pd.DataFrame({'card_id': card_id_test, 'target': pred})
df_test.to_csv('submission_lgbm.csv', index=False)