## LightGBM model trained on one-hot-encoded categorical features.

In [None]:
import pandas as pd
import numpy as np
from numpy.random import seed
seed(1)
from stacking_folds import train_lgb

In [None]:
DATA_PREPROCESSED_PATH = "../data/processed"

In [None]:
train_concat = pd.read_csv(os.path.join(DATA_PREPROCESSED_PATH, "train_concat.csv.gz"))
test_concat = pd.read_csv(os.path.join(DATA_PREPROCESSED_PATH, "test_concat.csv.gz"))

In [None]:
# list of best features
with open("train_features.txt", "r") as file:
    train_features = eval(file.readline())

In [None]:
# list of categorical features
cat_features = ['MatchedHit_TYPE[0]', 'MatchedHit_TYPE[1]', 'MatchedHit_TYPE[2]', 'MatchedHit_TYPE[3]']

## Features generation

In [None]:
train_concat['P_PT'] = train_concat.P - train_concat.PT
train_concat['P_PT/P'] = train_concat.P_PT / train_concat.P
test_concat['P_PT'] = test_concat.P - test_concat.PT
test_concat['P_PT/P'] = test_concat.P_PT / test_concat.P

In [None]:
train_features.append('P_PT')
train_features.append('P_PT/P')

In [None]:
# generated features names saved for features selection
for i in ['MatchedHit_X[0]', 'MatchedHit_X[1]', 'MatchedHit_X[2]', 'MatchedHit_X[3]', 
          'MatchedHit_Y[0]', 'MatchedHit_Y[1]', 'MatchedHit_Y[2]', 'MatchedHit_Y[3]']:
    # calculate absolute value for each column above
    train_concat['abs_%s' %i] = abs(train_concat['%s' %i])
    test_concat['abs_%s' %i] = abs(test_concat['%s' %i])
    # add generated features names
    train_features.append('abs_%s' %i)

In [None]:
# generate one-hot-encoded features
train_concat = pd.get_dummies(train_concat, columns=cat_features, prefix='dummy_')
test_concat = pd.get_dummies(test_concat, columns=cat_features, prefix='dummy_')
dummies = [item for item in list(test_concat.columns.values) if item.startswith('dummy_')]

## Fit and predict

In [None]:
n_folds = 5

In [None]:
y_validation, validation_predictions, test_predictions  = train_lgb(train_concat, test_concat,
                                                                    train_features, cat_features, 
                                                                    params, n_folds)

In [None]:
np.savetxt("../predictions/lgbm_dummies_val", validation_predictions)
np.savetxt("../predictions/lgbm_dummies_private", test_predictions)