In [1]:
import warnings
import time
import sys
import datetime
import pickle
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', 500)

%matplotlib inline

In [8]:
# データの読み込み
path_train_prepro = '../../../../data/processed/train_processed_0618.csv'
train = pd.read_csv(path_train_prepro)
train.columns

Index(['first_active_month', 'card_id', 'feature_1', 'feature_2', 'feature_3',
       'target', 'elapsed_time', 'hist_frequency', 'hist_amount_total',
       'hist_amount_mean', 'hist_merchant_category_nu',
       'hist_merchant_category_mode', 'hist_city_nu', 'hist_city_mode',
       'hist_state_nu', 'hist_state_mode', 'hist_date_min', 'hist_date_max',
       'hist_installments_mean', 'hist_installments_max',
       'hist_installments_min', 'hist_month_lag_mean', 'hist_month_lag_max',
       'hist_month_lag_min', 'hist_category_1_mod', 'hist_category_2_mod',
       'hist_category_3_mod', 'hist_ratio_No', 'hist_ratio_Yes',
       'hist_count_No', 'hist_count_Yes', 'hist_duration'],
      dtype='object')

In [9]:
train.drop(columns={'first_active_month','card_id','hist_date_min','hist_date_max'}).corr()

Unnamed: 0,feature_1,feature_2,feature_3,target,elapsed_time,hist_frequency,hist_amount_total,hist_amount_mean,hist_merchant_category_nu,hist_merchant_category_mode,hist_city_nu,hist_city_mode,hist_state_nu,hist_state_mode,hist_installments_mean,hist_installments_max,hist_installments_min,hist_month_lag_mean,hist_month_lag_max,hist_month_lag_min,hist_category_1_mod,hist_category_2_mod,hist_category_3_mod,hist_ratio_No,hist_ratio_Yes,hist_count_No,hist_count_Yes,hist_duration
feature_1,1.0,-0.130969,0.583092,-0.014251,0.117144,0.016334,-1.5e-05,-0.000154,-0.001235,-0.062191,0.035548,-0.002169,0.00068,-0.008908,0.038405,0.006874,0.022564,-0.092372,-0.013975,-0.100992,0.00134,-0.007831,0.015557,0.052071,-0.052071,0.080817,0.007531,0.098268
feature_2,-0.130969,1.0,0.060925,-0.006242,0.15815,-0.001444,-0.002228,-0.002277,-0.022814,0.009207,-0.068914,0.037559,-0.032445,0.049198,-0.149567,-0.03936,-0.027329,-0.082886,-0.047471,-0.087052,-0.064085,0.102278,-0.202244,0.012033,-0.012033,0.015293,-0.003355,0.079725
feature_3,0.583092,0.060925,1.0,-0.008125,0.196046,-0.019213,0.001853,0.00186,-0.064716,-0.03021,-0.05622,0.056173,-0.081851,0.043557,-0.19837,-0.048545,0.010398,-0.098449,-0.034171,-0.11197,-0.090561,0.023708,-0.282022,0.127352,-0.127352,0.096593,-0.031827,0.105359
target,-0.014251,-0.006242,-0.008125,1.0,-0.050453,-0.01164,0.000154,0.000295,-0.013517,-0.011544,-0.00632,0.010941,-0.011204,0.007871,-0.01391,-0.0062,-0.03912,-0.004,-0.017872,0.001677,-0.022791,0.011265,-0.01924,-0.042127,0.042127,-0.048008,-0.006516,-0.007183
elapsed_time,0.117144,0.15815,0.196046,-0.050453,1.0,0.12603,-0.000825,-0.000635,0.097903,0.026759,0.034233,0.012706,-0.000554,0.019926,-0.067094,-0.011186,0.071657,-0.529314,-0.087346,-0.580974,0.005598,-0.00592,-0.093191,0.018283,-0.018283,0.116563,0.11887,0.572402
hist_frequency,0.016334,-0.001444,-0.019213,-0.01164,0.12603,1.0,-0.004046,-0.001759,0.793448,-0.014476,0.561965,0.013258,0.396431,0.055679,-0.162168,0.013461,-0.118709,-0.257466,0.122953,-0.32464,-0.091496,0.02302,-0.142707,-0.149331,0.149331,0.496163,0.994572,0.371001
hist_amount_total,-1.5e-05,-0.002228,0.001853,0.000154,-0.000825,-0.004046,1.0,0.996139,-0.004161,0.001835,-0.003418,0.0025,-0.003604,-0.000497,-0.001769,-0.000411,-0.000107,0.003555,4.9e-05,0.003357,-0.0006,-0.001148,-0.00257,0.000701,-0.000701,-0.001441,-0.004092,-0.003459
hist_amount_mean,-0.000154,-0.002277,0.00186,0.000295,-0.000635,-0.001759,0.996139,1.0,-0.002519,0.001624,-0.001927,0.00218,-0.002567,-0.000495,-0.001677,-0.000415,-0.000266,0.00332,0.000481,0.002867,-0.000614,-0.001581,-0.002137,-0.000208,0.000208,-0.001237,-0.001706,-0.002835
hist_merchant_category_nu,-0.001235,-0.022814,-0.064716,-0.013517,0.097903,0.793448,-0.004161,-0.002519,1.0,-0.052626,0.645214,0.032471,0.508204,0.09215,-0.126353,0.034394,-0.231776,-0.234808,0.149143,-0.308606,-0.197417,0.02835,-0.094955,-0.21345,0.21345,0.344081,0.795085,0.35895
hist_merchant_category_mode,-0.062191,0.009207,-0.03021,-0.011544,0.026759,-0.014476,0.001835,0.001624,-0.052626,1.0,-0.113257,-0.054672,-0.062192,-0.102295,0.05947,0.010144,0.035275,-0.019771,0.024326,-0.032836,0.201547,-0.047212,0.057039,0.085342,-0.085342,0.058934,-0.022321,0.036588


In [5]:
train.corr()

ValueError: could not convert string to float: '2017-06-01'