# https://github.com/ChenglongChen/tensorflow-DeepFM

In [2]:
import numpy as np
import pandas as pd
import datetime
import itertools

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import lightgbm as lgb

from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import mean_squared_error, roc_auc_score, confusion_matrix, f1_score
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')
np.random.seed(4590)

pd.options.display.precision = 7
pd.options.display.max_rows = 50

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [4]:
df_train = pd.read_csv('./input/train.csv')
df_test = pd.read_csv('./input/test.csv')

In [5]:
df_train["outliers"] = np.where(df_train.target < -30, 1, 0)

In [6]:
def merge_new_feature(path):
    df_new_feature = pd.read_csv(path)
    
    df_tr = pd.merge(df_train, df_new_feature, on="card_id", how="left")
    df_te = pd.merge(df_test, df_new_feature, on="card_id", how="left")
    
    return df_tr, df_te

In [8]:
df_train, df_test = merge_new_feature("./input/feature_categorical_count_rate.csv")
df_train, df_test = merge_new_feature("./input/feature_categorical_rete__by__categorical_count.csv")
df_train, df_test = merge_new_feature("./input/feature_various_time_data.csv")
df_train, df_test = merge_new_feature("./input/merchants_nmf.csv")
df_train, df_test = merge_new_feature("./input/merchants_category3_A_nmf.csv")
df_train, df_test = merge_new_feature("./input/feature_catogorical_count_weighted_elapsed.csv")
df_train, df_test = merge_new_feature("./input/feature_latest_transaction_data__per__card_id.csv")
df_train, df_test = merge_new_feature("./input/feature_term_from_second_latest_to_latest.csv")
df_train, df_test = merge_new_feature("./input/feature_nunique_marchant__per__card_id.csv")
df_train, df_test = merge_new_feature("./input/feature_purchase_amount_mean_max_increase.csv")
df_train, df_test = merge_new_feature("./input/feature_frequency_mean_increase_rate.csv")
df_train, df_test = merge_new_feature("./input/feature_card_count_increase_rate.csv")
df_train, df_test = merge_new_feature("./input/feature_new_merchant_trs_card_flag.csv")
df_train, df_test = merge_new_feature("./input/feature_count_null_in_h_trs.csv")
df_train, df_test = merge_new_feature("./input/feature_card_valid_term.csv")
df_train, df_test = merge_new_feature("./input/feature_most_used_city_id.csv")
df_train, df_test = merge_new_feature("./input/feature_most_used_state_id.csv")

In [28]:
a = """
first_active_month                                           201917 non-null object
card_id                                                      201917 non-null object
feature_1                                                    201917 non-null int64
feature_2                                                    201917 non-null int64
feature_3                                                    201917 non-null int64
target                                                       201917 non-null float64
outliers                                                     201917 non-null int64
card_id_count                                                201917 non-null int64
authorized_flag_ture                                         201917 non-null int64
authorized_flag_ture_rate                                    201917 non-null float64
category_1_true                                              201917 non-null int64
category_1_true_rate_x                                       201917 non-null float64
category_2_1                                                 201917 non-null int64
category_2_2                                                 201917 non-null int64
category_2_3                                                 201917 non-null int64
category_2_4                                                 201917 non-null int64
category_2_5                                                 201917 non-null int64
category_2_1_rate_x                                          201917 non-null float64
category_2_2_rate_x                                          201917 non-null float64
category_2_3_rate_x                                          201917 non-null float64
category_2_4_rate_x                                          201917 non-null float64
category_2_5_rate_x                                          201917 non-null float64
category_3_A                                                 201917 non-null int64
category_3_B                                                 201917 non-null int64
category_3_C                                                 201917 non-null int64
category_3_A_rate_x                                          201917 non-null float64
category_3_B_rate_x                                          201917 non-null float64
category_3_C_rate_x                                          201917 non-null float64
category_4_true_x                                            201917 non-null int64
category_4_true_rate                                         201917 non-null float64
installments_N                                               201917 non-null int64
installments_0                                               201917 non-null int64
installments_1                                               201917 non-null int64
installments_2                                               201917 non-null int64
installments_3                                               201917 non-null int64
installments_4                                               201917 non-null int64
installments_5                                               201917 non-null int64
installments_6                                               201917 non-null int64
installments_7_                                              201917 non-null int64
installments_N_rate_x                                        201917 non-null float64
installments_0_rate_x                                        201917 non-null float64
installments_1_rate_x                                        201917 non-null float64
installments_2_rate_x                                        201917 non-null float64
installments_3_rate_x                                        201917 non-null float64
installments_4_rate_x                                        201917 non-null float64
installments_5_rate_x                                        201917 non-null float64
installments_6_rate_x                                        201917 non-null float64
installments_7__rate_x                                       201917 non-null float64
month_lag_N13                                                201917 non-null int64
month_lag_N12                                                201917 non-null int64
month_lag_N11                                                201917 non-null int64
month_lag_N10                                                201917 non-null int64
month_lag_N9                                                 201917 non-null int64
month_lag_N8                                                 201917 non-null int64
month_lag_N7                                                 201917 non-null int64
month_lag_N6                                                 201917 non-null int64
month_lag_N5                                                 201917 non-null int64
month_lag_N4                                                 201917 non-null int64
month_lag_N3                                                 201917 non-null int64
month_lag_N2                                                 201917 non-null int64
month_lag_N1                                                 201917 non-null int64
month_lag_0                                                  201917 non-null int64
month_lag_1                                                  201917 non-null int64
month_lag_2                                                  201917 non-null int64
month_lag_N13_rate_x                                         201917 non-null float64
month_lag_N12_rate_x                                         201917 non-null float64
month_lag_N11_rate_x                                         201917 non-null float64
month_lag_N10_rate_x                                         201917 non-null float64
month_lag_N9_rate_x                                          201917 non-null float64
month_lag_N8_rate_x                                          201917 non-null float64
month_lag_N7_rate_x                                          201917 non-null float64
month_lag_N6_rate_x                                          201917 non-null float64
month_lag_N5_rate_x                                          201917 non-null float64
month_lag_N4_rate_x                                          201917 non-null float64
month_lag_N3_rate_x                                          201917 non-null float64
month_lag_N2_rate_x                                          201917 non-null float64
month_lag_N1_rate_x                                          201917 non-null float64
month_lag_0_rate_x                                           201917 non-null float64
month_lag_1_rate                                             201917 non-null float64
month_lag_2_rate                                             201917 non-null float64
most_recent_sales_range_A                                    201917 non-null int64
most_recent_sales_range_B                                    201917 non-null int64
most_recent_sales_range_C                                    201917 non-null int64
most_recent_sales_range_D                                    201917 non-null int64
most_recent_sales_range_E                                    201917 non-null int64
most_recent_sales_range_A_rate_x                             201917 non-null float64
most_recent_sales_range_B_rate_x                             201917 non-null float64
most_recent_sales_range_C_rate_x                             201917 non-null float64
most_recent_sales_range_D_rate_x                             201917 non-null float64
most_recent_sales_range_E_rate_x                             201917 non-null float64
most_recent_purchases_range_A                                201917 non-null int64
most_recent_purchases_range_B                                201917 non-null int64
most_recent_purchases_range_C                                201917 non-null int64
most_recent_purchases_range_D                                201917 non-null int64
most_recent_purchases_range_E                                201917 non-null int64
most_recent_purchases_range_A_rate_x                         201917 non-null float64
most_recent_purchases_range_B_rate_x                         201917 non-null float64
most_recent_purchases_range_C_rate_x                         201917 non-null float64
most_recent_purchases_range_D_rate_x                         201917 non-null float64
most_recent_purchases_range_E_rate_x                         201917 non-null float64
authorized_flag_true_rate                                    201917 non-null float64
category_1_true_rate_y                                       201917 non-null float64
category_2_1_rate_y                                          201917 non-null float64
category_2_2_rate_y                                          201917 non-null float64
category_2_3_rate_y                                          201917 non-null float64
category_2_4_rate_y                                          201917 non-null float64
category_2_5_rate_y                                          201917 non-null float64
category_3_A_rate_y                                          201917 non-null float64
category_3_B_rate_y                                          201917 non-null float64
category_3_C_rate_y                                          201917 non-null float64
category_4_true_y                                            201917 non-null float64
installments_N_rate_y                                        201917 non-null float64
installments_0_rate_y                                        201917 non-null float64
installments_1_rate_y                                        201917 non-null float64
installments_2_rate_y                                        201917 non-null float64
installments_3_rate_y                                        201917 non-null float64
installments_4_rate_y                                        201917 non-null float64
installments_5_rate_y                                        201917 non-null float64
installments_6_rate_y                                        201917 non-null float64
installments_7__rate_y                                       201917 non-null float64
installments_999_rate                                        201917 non-null float64
month_lag_N13_rate_y                                         201917 non-null float64
month_lag_N12_rate_y                                         201917 non-null float64
month_lag_N11_rate_y                                         201917 non-null float64
month_lag_N10_rate_y                                         201917 non-null float64
month_lag_N9_rate_y                                          201917 non-null float64
month_lag_N8_rate_y                                          201917 non-null float64
month_lag_N7_rate_y                                          201917 non-null float64
month_lag_N6_rate_y                                          201917 non-null float64
month_lag_N5_rate_y                                          201917 non-null float64
month_lag_N4_rate_y                                          201917 non-null float64
month_lag_N3_rate_y                                          201917 non-null float64
month_lag_N2_rate_y                                          201917 non-null float64
month_lag_N1_rate_y                                          201917 non-null float64
month_lag_0_rate_y                                           201917 non-null float64
most_recent_sales_range_A_rate_y                             201917 non-null float64
most_recent_sales_range_B_rate_y                             201917 non-null float64
most_recent_sales_range_C_rate_y                             201917 non-null float64
most_recent_sales_range_D_rate_y                             201917 non-null float64
most_recent_sales_range_E_rate_y                             201917 non-null float64
most_recent_purchases_range_A_rate_y                         201917 non-null float64
most_recent_purchases_range_B_rate_y                         201917 non-null float64
most_recent_purchases_range_C_rate_y                         201917 non-null float64
most_recent_purchases_range_D_rate_y                         201917 non-null float64
most_recent_purchases_range_E_rate_y                         201917 non-null float64
feature                                                      201917 non-null int64
elapsed_from_feature_oldest_active_to_first_active           201917 non-null float64
elapsed_from_oldest_purchase_date_to_first_active            201917 non-null float64
elapsed_from_oldest_purchase_date_to_latest_purchase_date    201917 non-null int64
elapsed_from_latest_purchase_date                            201917 non-null int64
NMF_comp_1                                                   201917 non-null float64
NMF_comp_2                                                   201917 non-null float64
NMF_comp_3                                                   201917 non-null float64
NMF_comp_4                                                   201917 non-null float64
NMF_comp_5                                                   201917 non-null float64
NMF_comp_6                                                   201917 non-null float64
NMF_comp_7                                                   201917 non-null float64
NMF_comp_8                                                   201917 non-null float64
NMF_comp_9                                                   201917 non-null float64
NMF_comp_10                                                  201917 non-null float64
NMF_comp_11                                                  201917 non-null float64
NMF_comp_12                                                  201917 non-null float64
NMF_comp_13                                                  201917 non-null float64
NMF_comp_14                                                  201917 non-null float64
NMF_comp_15                                                  201917 non-null float64
NMF_comp_16                                                  201917 non-null float64
NMF_comp_17                                                  201917 non-null float64
NMF_comp_18                                                  201917 non-null float64
NMF_comp_19                                                  201917 non-null float64
NMF_comp_20                                                  201917 non-null float64
NMF_category3_comp_1                                         101619 non-null float64
NMF_category3_comp_2                                         101619 non-null float64
NMF_category3_comp_3                                         101619 non-null float64
NMF_category3_comp_4                                         101619 non-null float64
NMF_category3_comp_5                                         101619 non-null float64
NMF_category3_comp_6                                         101619 non-null float64
NMF_category3_comp_7                                         101619 non-null float64
NMF_category3_comp_8                                         101619 non-null float64
NMF_category3_comp_9                                         101619 non-null float64
NMF_category3_comp_10                                        101619 non-null float64
NMF_category3_comp_11                                        101619 non-null float64
NMF_category3_comp_12                                        101619 non-null float64
NMF_category3_comp_13                                        101619 non-null float64
NMF_category3_comp_14                                        101619 non-null float64
NMF_category3_comp_15                                        101619 non-null float64
NMF_category3_comp_16                                        101619 non-null float64
NMF_category3_comp_17                                        101619 non-null float64
NMF_category3_comp_18                                        101619 non-null float64
NMF_category3_comp_19                                        101619 non-null float64
NMF_category3_comp_20                                        101619 non-null float64
authorized_flag_ture_weighted_elapsed                        201917 non-null int64
category_1_true_weighted_elapsed                             201917 non-null int64
category_2_1_weighted_elapsed                                201917 non-null float64
category_2_2_weighted_elapsed                                201917 non-null float64
category_2_3_weighted_elapsed                                201917 non-null float64
category_2_4_weighted_elapsed                                201917 non-null float64
category_2_5_weighted_elapsed                                201917 non-null float64
category_3_A_weighted_elapsed                                201917 non-null float64
category_3_B_weighted_elapsed                                201917 non-null float64
category_3_C_weighted_elapsed                                201917 non-null float64
category_4_true_weighted_elapsed                             201917 non-null float64
installments_N_weighted_elapsed                              201917 non-null float64
installments_0_weighted_elapsed                              201917 non-null float64
installments_1_weighted_elapsed                              201917 non-null float64
installments_2_weighted_elapsed                              201917 non-null float64
installments_3_weighted_elapsed                              201917 non-null float64
installments_4_weighted_elapsed                              201917 non-null float64
installments_5_weighted_elapsed                              201917 non-null float64
installments_6_weighted_elapsed                              201917 non-null float64
installments_7__weighted_elapsed                             201917 non-null float64
installments_999_weighted_elapsed                            201917 non-null float64
month_lag_N13_weighted_elapsed                               201917 non-null float64
month_lag_N12_weighted_elapsed                               201917 non-null float64
month_lag_N11_weighted_elapsed                               201917 non-null float64
month_lag_N10_weighted_elapsed                               201917 non-null float64
month_lag_N9_weighted_elapsed                                201917 non-null float64
month_lag_N8_weighted_elapsed                                201917 non-null float64
month_lag_N7_weighted_elapsed                                201917 non-null float64
month_lag_N6_weighted_elapsed                                201917 non-null float64
month_lag_N5_weighted_elapsed                                201917 non-null float64
month_lag_N4_weighted_elapsed                                201917 non-null float64
month_lag_N3_weighted_elapsed                                201917 non-null float64
month_lag_N2_weighted_elapsed                                201917 non-null float64
month_lag_N1_weighted_elapsed                                201917 non-null float64
month_lag_0_weighted_elapsed                                 201917 non-null float64
most_recent_sales_range_A_weighted_elapsed                   201917 non-null float64
most_recent_sales_range_B_weighted_elapsed                   201917 non-null float64
most_recent_sales_range_C_weighted_elapsed                   201917 non-null float64
most_recent_sales_range_D_weighted_elapsed                   201917 non-null float64
most_recent_sales_range_E_weighted_elapsed                   201917 non-null float64
most_recent_purchases_range_A_weighted_elapsed               201917 non-null float64
most_recent_purchases_range_B_weighted_elapsed               201917 non-null float64
most_recent_purchases_range_C_weighted_elapsed               201917 non-null float64
most_recent_purchases_range_D_weighted_elapsed               201917 non-null float64
most_recent_purchases_range_E_weighted_elapsed               201917 non-null float64
authorized_flag_latest_trs                                   201917 non-null float64
category_1_true_latest_trs                                   201917 non-null float64
category_2_latest_trs                                        180831 non-null float64
category_3_A_latest_trs                                      201917 non-null float64
category_3_B_latest_trs                                      201917 non-null float64
category_3_C_latest_trs                                      201917 non-null float64
category_4_true_latest_trs                                   201917 non-null float64
category_4_null_latest_trs                                   201917 non-null float64
installments_latest_trs                                      201917 non-null float64
numerical_1_latest_trs                                       111282 non-null float64
numerical_1_null_latest_trs                                  201917 non-null float64
month_lag_latest_trs                                         201917 non-null int64
purchase_amount_latest_trs                                   201917 non-null float64
most_recent_sales_range_A_latest_trs                         201917 non-null float64
most_recent_sales_range_B_latest_trs                         201917 non-null float64
most_recent_sales_range_C_latest_trs                         201917 non-null float64
most_recent_sales_range_D_latest_trs                         201917 non-null float64
most_recent_sales_range_E_latest_trs                         201917 non-null float64
most_recent_sales_range_null_latest_trs                      201917 non-null float64
most_recent_purchases_range_A_latest_trs                     201917 non-null float64
most_recent_purchases_range_B_latest_trs                     201917 non-null float64
most_recent_purchases_range_C_latest_trs                     201917 non-null float64
most_recent_purchases_range_D_latest_trs                     201917 non-null float64
most_recent_purchases_range_E_latest_trs                     201917 non-null float64
most_recent_purchases_range_null_latest_trs                  201917 non-null float64
elapsed_from_second_latest_to_latest                         201917 non-null int64
nunique_merchant                                             201917 non-null int64
mean_all_purchase_amount                                     201917 non-null float64
max_all_purchase_amount                                      201917 non-null float64
mean_latest3_purchase_amount                                 160717 non-null float64
max_latest3_purchase_amount                                  160717 non-null float64
mean_latest6_purchase_amount                                 160717 non-null float64
max_latest6_purchase_amount                                  160717 non-null float64
increase_rate_from_all_to_latest3                            160717 non-null float64
increase_rate_from_latest6_to_latest3                        160717 non-null float64
increase_rate_from_all_to_latest6                            160717 non-null float64
frequency_all                                                201917 non-null float64
frequency_latest3                                            160717 non-null float64
frequency_latest6                                            183905 non-null float64
frequency_increase_rate_from_all_to_latest3                  160717 non-null float64
frequency_increase_rate_from_all_to_latest6                  183905 non-null float64
frequency_increase_rate_from_latest6_to_latest3              181633 non-null float64
count_latest3_card_in_trs                                    160717 non-null float64
count_latest6_card_in_trs                                    183905 non-null float64
count_increase_rate_from_all_to_latest3                      160717 non-null float64
count_increase_rate_from_all_to_latest6                      183905 non-null float64
count_increase_rate_from_latest6_to_latest3                  160717 non-null float64
new_merchant_trs_flag                                        201917 non-null int64
count_null_category_2                                        201917 non-null int64
rate_null_category_2                                         201917 non-null float64
count_null_category_3                                        201917 non-null int64
rate_null_category_3                                         201917 non-null float64
count_null_merchant_id                                       201917 non-null int64
rate_null_merchant_id                                        201917 non-null float64
card_valid_term                                              201917 non-null float64
most_used_city_id                                            201917 non-null int64
most_used_state_id                                           201917 non-null int64
"""

In [29]:
a = a.replace("201917 non-null", "---").replace(" ", "").split("\n")[1:]
a

['first_active_month---object',
 'card_id---object',
 'feature_1---int64',
 'feature_2---int64',
 'feature_3---int64',
 'target---float64',
 'outliers---int64',
 'card_id_count---int64',
 'authorized_flag_ture---int64',
 'authorized_flag_ture_rate---float64',
 'category_1_true---int64',
 'category_1_true_rate_x---float64',
 'category_2_1---int64',
 'category_2_2---int64',
 'category_2_3---int64',
 'category_2_4---int64',
 'category_2_5---int64',
 'category_2_1_rate_x---float64',
 'category_2_2_rate_x---float64',
 'category_2_3_rate_x---float64',
 'category_2_4_rate_x---float64',
 'category_2_5_rate_x---float64',
 'category_3_A---int64',
 'category_3_B---int64',
 'category_3_C---int64',
 'category_3_A_rate_x---float64',
 'category_3_B_rate_x---float64',
 'category_3_C_rate_x---float64',
 'category_4_true_x---int64',
 'category_4_true_rate---float64',
 'installments_N---int64',
 'installments_0---int64',
 'installments_1---int64',
 'installments_2---int64',
 'installments_3---int64',
 'i

In [None]:
for i in a:
    

In [None]:
ignore_cols = [
    "first_active_month",
    "card_id",
    "target",
    "outliers",
    "feature"
]


numeric_cols = [
    "card_id_count",
    ""
]


categorical_cols = []