In [1]:
import os
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import cohen_kappa_score, accuracy_score, mean_absolute_error, f1_score
from sklearn.model_selection import GroupKFold, KFold, StratifiedKFold
from tqdm import tqdm
import lightgbm as lgb
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import gc
from datetime import datetime
from sklearn.preprocessing import PolynomialFeatures
from itertools import combinations

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
seed = 2020

In [3]:
df_train = pd.read_csv('./data/used_car_train_20200313.csv', sep=' ')
df_test = pd.read_csv('./data/used_car_testB_20200421.csv', sep=' ')
df_sub = pd.read_csv('./data/used_car_sample_submit.csv', sep=' ')

In [4]:
df_feature = pd.concat([df_train, df_test], sort=False)

In [5]:
df_feature['regionCode'].max()

8120

In [6]:
df_feature.head()

Unnamed: 0,SaleID,name,regDate,model,brand,bodyType,fuelType,gearbox,power,kilometer,notRepairedDamage,regionCode,seller,offerType,creatDate,price,v_0,v_1,v_2,v_3,v_4,v_5,v_6,v_7,v_8,v_9,v_10,v_11,v_12,v_13,v_14
0,0,736,20040402,30.0,6,1.0,0.0,0.0,60,12.5,0.0,1046,0,0,20160404,1850.0,43.357796,3.966344,0.050257,2.159744,1.143786,0.235676,0.101988,0.129549,0.022816,0.097462,-2.881803,2.804097,-2.420821,0.795292,0.914762
1,1,2262,20030301,40.0,1,2.0,0.0,0.0,0,15.0,-,4366,0,0,20160309,3600.0,45.305273,5.236112,0.137925,1.380657,-1.422165,0.264777,0.121004,0.135731,0.026597,0.020582,-4.900482,2.096338,-1.030483,-1.722674,0.245522
2,2,14874,20040403,115.0,15,1.0,0.0,0.0,163,12.5,0.0,2806,0,0,20160402,6222.0,45.978359,4.823792,1.319524,-0.998467,-0.996911,0.25141,0.114912,0.165147,0.062173,0.027075,-4.846749,1.803559,1.56533,-0.832687,-0.229963
3,3,71865,19960908,109.0,10,0.0,0.0,1.0,193,15.0,0.0,434,0,0,20160312,2400.0,45.687478,4.492574,-0.050616,0.8836,-2.228079,0.274293,0.1103,0.121964,0.033395,0.0,-4.509599,1.28594,-0.501868,-2.438353,-0.478699
4,4,111080,20120103,110.0,5,1.0,0.0,0.0,68,5.0,0.0,6977,0,0,20160313,5200.0,44.383511,2.031433,0.572169,-1.571239,2.246088,0.228036,0.073205,0.09188,0.078819,0.121534,-1.89624,0.910783,0.93111,2.834518,1.923482


In [7]:
from scipy import stats

cols = ['bodyType', 'fuelType', 'gearbox']
df_feature['gp'] = df_feature['brand'].astype(
    'str') + df_feature['model'].astype('str')
gp_col = 'gp'

df_na = df_feature[cols].isna()
# 根据分组计算众数
df_mode = df_feature.groupby(gp_col)[cols].agg(
    lambda x: stats.mode(x)[0][0])

for col in cols:
    na_series = df_na[col]
    names = list(df_feature.loc[na_series, gp_col])

    t = df_mode.loc[names, col]
    t.index = df_feature.loc[na_series, col].index

    df_feature.loc[na_series, col] = t

del df_feature['gp']
df_feature[cols].isnull().sum()

bodyType    15
fuelType    41
gearbox      3
dtype: int64

In [8]:
df_feature['notRepairedDamage'] = df_feature['notRepairedDamage'].replace(
    '-', 2)
df_feature['notRepairedDamage'] = df_feature['notRepairedDamage'].astype(
    'float')

In [9]:
df_feature.tail()

Unnamed: 0,SaleID,name,regDate,model,brand,bodyType,fuelType,gearbox,power,kilometer,notRepairedDamage,regionCode,seller,offerType,creatDate,price,v_0,v_1,v_2,v_3,v_4,v_5,v_6,v_7,v_8,v_9,v_10,v_11,v_12,v_13,v_14
49995,249995,111443,20041005,4.0,4,0.0,0.0,1.0,150,15.0,2.0,5564,0,0,20160309,,46.321013,-3.304401,0.073363,-0.622359,-0.778349,0.263668,0.000292,0.141804,0.076393,0.039272,2.072901,-2.531869,1.716978,-1.063437,0.326587
49996,249996,152834,20130409,65.0,1,0.0,0.0,0.0,179,4.0,0.0,5220,0,0,20160323,,48.086547,-3.318641,0.965881,-2.67216,0.35744,0.25531,0.000991,0.155868,0.108425,0.067841,1.358504,-3.290295,4.269809,0.140524,0.556221
49997,249997,132531,20041211,4.0,4,0.0,0.0,1.0,147,12.5,0.0,3795,0,0,20160316,,46.145279,-3.305263,-0.015283,-0.288329,-0.687112,0.262933,0.000318,0.141872,0.071968,0.042966,2.165658,-2.417885,1.370612,-1.073133,0.270602
49998,249998,143405,20020702,40.0,1,4.0,0.0,1.0,176,15.0,0.0,61,0,0,20160327,,45.507088,-3.197006,-1.141252,-0.43493,-1.84504,0.282106,2.3e-05,0.067483,0.067526,0.009006,2.030114,-2.939244,0.569078,-1.718245,0.316379
49999,249999,78202,20090708,32.0,8,1.0,0.0,0.0,0,3.0,0.0,4158,0,0,20160401,,44.289471,4.181452,0.547068,-0.775841,1.789601,0.231449,0.103947,0.096027,0.062328,0.11018,-3.68909,2.032376,0.109157,2.202828,0.847469


In [10]:
del df_feature['seller']
del df_feature['offerType']

In [11]:
df_feature['price'] = np.log1p(df_feature['price'])

# feature engine

In [12]:
v_cols = ['v_'+str(i) for i in range(15)]

df_feature['v_mean'] = df_feature[v_cols].mean(axis=1)
df_feature['v_max'] = df_feature[v_cols].max(axis=1)
df_feature['v_min'] = df_feature[v_cols].min(axis=1)
df_feature['v_std'] = df_feature[v_cols].std(axis=1)

for col in ['v_mean', 'v_max', 'v_min', 'v_std']:
    df_feature[f'name_{col}_mean'] = df_feature.groupby('name')[
        col].transform('mean')
    df_feature[f'name_{col}_std'] = df_feature.groupby('name')[
        col].transform('std')
    df_feature[f'name_{col}_max'] = df_feature.groupby('name')[
        col].transform('max')
    df_feature[f'name_{col}_min'] = df_feature.groupby('name')[
        col].transform('min')

In [13]:
# # 时序特征
# df_sort = df_feature[['brand', 'model', 'creatDate', 'price']]
# df_sort = df_sort.groupby(['brand', 'model', 'creatDate'])[
#     'price'].mean().reset_index()
# df_sort.rename(columns={'price': 'brand_model_day_price_mean'}, inplace=True)
# df_sort = df_sort.sort_values(['brand', 'model', 'creatDate'])
# df_sort['brand_model_day_shift1_price_mean'] = df_sort.groupby(
#     ['brand', 'model'])['brand_model_day_price_mean'].shift(1)
# del df_sort['brand_model_day_price_mean']
# # # df_sort['brand_model_price_rolling3_mean'] = df_sort.groupby(
# # #     ['brand', 'model'])['price'].shift().rolling(window=3, min_periods=3).mean()
# # df_sort['brand_model_price_shift1'] = df_sort.groupby(
# #     ['brand', 'model'])['price'].shift()
# # df_sort.head()
# # print(df_feature.shape)
# df_feature = df_feature.merge(df_sort, how='left')
# # print(df_feature.shape)

In [14]:
df_feature['name_count'] = df_feature.groupby(
    ['name'])['SaleID'].transform('count')

In [15]:
def date_parse(x):
    year = int(str(x)[:4])
    month = int(str(x)[4:6])
    day = int(str(x)[6:8])

    if month < 1:
        month = 1

    date = datetime(year, month, day)
    return date


df_feature['regDate'] = df_feature['regDate'].apply(date_parse)
df_feature['creatDate'] = df_feature['creatDate'].apply(date_parse)
df_feature['regDate_year'] = df_feature['regDate'].dt.year
df_feature['creatDate_year'] = df_feature['creatDate'].dt.year
df_feature['creatDate_month'] = df_feature['creatDate'].dt.month

In [16]:
df_feature['car_age_day'] = (
    df_feature['creatDate'] - df_feature['regDate']).dt.days
df_feature['car_age_year'] = round(df_feature['car_age_day'] / 365, 1)

In [17]:
# 简单统计
def stat(df, df_merge, group_by, agg):
    group = df.groupby(group_by).agg(agg)

    columns = []
    for on, methods in agg.items():
        for method in methods:
            columns.append('{}_{}_{}'.format('_'.join(group_by), on, method))
    group.columns = columns
    group.reset_index(inplace=True)
    df_merge = df_merge.merge(group, on=group_by, how='left')

    del (group)
    gc.collect()

    return df_merge

In [18]:
l = ['name', 'model', 'brand', 'bodyType']
for f1 in tqdm(l):
    for f2 in v_cols:
        df_feature = stat(df_feature, df_feature, [f1], {
            f2: ['mean', 'max', 'min', 'std']})

100%|██████████| 4/4 [00:51<00:00, 12.76s/it]


In [19]:
def statis_feat(df_know, df_unknow):
    l = ['name', 'model', 'brand', 'bodyType']

    combs = list(combinations(l, 2))
    for t1, t2 in combs:
        df_unknow = stat(df_know, df_unknow, [t1, t2], {
                         'price': ['mean', 'max', 'min', 'std', 'median']})

    for f in tqdm(l):
        df_unknow = stat(df_know, df_unknow, [f], {
                         'price': ['mean', 'max', 'min', 'std']})

    return df_unknow

In [20]:
# 5折交叉
df_train = df_feature[~df_feature['price'].isnull()]
df_train = df_train.reset_index(drop=True)
df_test = df_feature[df_feature['price'].isnull()]

df_stas_feat = None
kf = KFold(n_splits=5, random_state=seed, shuffle=True)
for train_index, val_index in kf.split(df_train):
    df_fold_train = df_train.iloc[train_index]
    df_fold_val = df_train.iloc[val_index]

    df_fold_val = statis_feat(df_fold_train, df_fold_val)
    df_stas_feat = pd.concat([df_stas_feat, df_fold_val], axis=0)

    del(df_fold_train)
    del(df_fold_val)
    gc.collect()

df_test = statis_feat(df_train, df_test)
df_feature = pd.concat([df_stas_feat, df_test], axis=0)

del(df_stas_feat)
del(df_train)
del(df_test)
gc.collect()

100%|██████████| 4/4 [00:01<00:00,  3.41it/s]
100%|██████████| 4/4 [00:01<00:00,  2.66it/s]
100%|██████████| 4/4 [00:01<00:00,  2.33it/s]
100%|██████████| 4/4 [00:01<00:00,  2.96it/s]
100%|██████████| 4/4 [00:01<00:00,  3.93it/s]
100%|██████████| 4/4 [00:01<00:00,  2.63it/s]


27

In [21]:
df_feature['v_0_add_v_4'] = df_feature['v_0'] + df_feature['v_4']
df_feature['v_0_add_v_8'] = df_feature['v_0'] + df_feature['v_8']
df_feature['v_1_add_v_3'] = df_feature['v_1'] + df_feature['v_3']
df_feature['v_1_add_v_4'] = df_feature['v_1'] + df_feature['v_4']
df_feature['v_1_add_v_5'] = df_feature['v_1'] + df_feature['v_5']
df_feature['v_1_add_v_12'] = df_feature['v_1'] + df_feature['v_12']
df_feature['v_2_add_v_3'] = df_feature['v_2'] + df_feature['v_3']
df_feature['v_4_add_v_11'] = df_feature['v_4'] + df_feature['v_11']
df_feature['v_4_add_v_12'] = df_feature['v_4'] + df_feature['v_12']
df_feature['v_0_add_v_12_add_v_14'] = df_feature['v_0'] + \
    df_feature['v_12'] + df_feature['v_14']

In [22]:
df_feature['v_4_add_v_9_minu_v_13'] = df_feature['v_4'] + \
    df_feature['v_9'] - df_feature['v_13']
df_feature['v_2_add_v_4_minu_v_11'] = df_feature['v_2'] + \
    df_feature['v_4'] - df_feature['v_11']
df_feature['v_2_add_v_3_minu_v_11'] = df_feature['v_2'] + \
    df_feature['v_3'] - df_feature['v_11']

In [23]:
df_feature.head()

Unnamed: 0,SaleID,name,regDate,model,brand,bodyType,fuelType,gearbox,power,kilometer,notRepairedDamage,regionCode,creatDate,price,v_0,v_1,v_2,v_3,v_4,v_5,v_6,v_7,v_8,v_9,v_10,v_11,v_12,v_13,v_14,v_mean,v_max,v_min,v_std,name_v_mean_mean,name_v_mean_std,name_v_mean_max,name_v_mean_min,name_v_max_mean,name_v_max_std,name_v_max_max,name_v_max_min,name_v_min_mean,name_v_min_std,name_v_min_max,name_v_min_min,name_v_std_mean,name_v_std_std,name_v_std_max,name_v_std_min,name_count,regDate_year,creatDate_year,creatDate_month,car_age_day,car_age_year,name_v_0_mean,name_v_0_max,name_v_0_min,name_v_0_std,name_v_1_mean,name_v_1_max,name_v_1_min,name_v_1_std,name_v_2_mean,name_v_2_max,name_v_2_min,name_v_2_std,name_v_3_mean,name_v_3_max,name_v_3_min,name_v_3_std,name_v_4_mean,name_v_4_max,name_v_4_min,name_v_4_std,name_v_5_mean,name_v_5_max,name_v_5_min,name_v_5_std,name_v_6_mean,name_v_6_max,name_v_6_min,name_v_6_std,name_v_7_mean,name_v_7_max,name_v_7_min,name_v_7_std,name_v_8_mean,name_v_8_max,name_v_8_min,name_v_8_std,name_v_9_mean,name_v_9_max,name_v_9_min,name_v_9_std,name_v_10_mean,name_v_10_max,name_v_10_min,name_v_10_std,name_v_11_mean,name_v_11_max,name_v_11_min,name_v_11_std,name_v_12_mean,name_v_12_max,name_v_12_min,name_v_12_std,name_v_13_mean,name_v_13_max,name_v_13_min,name_v_13_std,name_v_14_mean,name_v_14_max,name_v_14_min,name_v_14_std,model_v_0_mean,model_v_0_max,model_v_0_min,model_v_0_std,model_v_1_mean,model_v_1_max,model_v_1_min,model_v_1_std,model_v_2_mean,model_v_2_max,model_v_2_min,model_v_2_std,model_v_3_mean,model_v_3_max,model_v_3_min,model_v_3_std,model_v_4_mean,model_v_4_max,model_v_4_min,model_v_4_std,model_v_5_mean,model_v_5_max,model_v_5_min,model_v_5_std,model_v_6_mean,model_v_6_max,model_v_6_min,model_v_6_std,model_v_7_mean,model_v_7_max,model_v_7_min,model_v_7_std,model_v_8_mean,model_v_8_max,model_v_8_min,model_v_8_std,model_v_9_mean,model_v_9_max,model_v_9_min,model_v_9_std,model_v_10_mean,model_v_10_max,model_v_10_min,model_v_10_std,model_v_11_mean,model_v_11_max,model_v_11_min,model_v_11_std,model_v_12_mean,model_v_12_max,model_v_12_min,model_v_12_std,model_v_13_mean,model_v_13_max,model_v_13_min,model_v_13_std,model_v_14_mean,model_v_14_max,model_v_14_min,model_v_14_std,brand_v_0_mean,brand_v_0_max,brand_v_0_min,brand_v_0_std,brand_v_1_mean,brand_v_1_max,brand_v_1_min,brand_v_1_std,brand_v_2_mean,brand_v_2_max,brand_v_2_min,brand_v_2_std,brand_v_3_mean,brand_v_3_max,brand_v_3_min,brand_v_3_std,brand_v_4_mean,brand_v_4_max,brand_v_4_min,brand_v_4_std,brand_v_5_mean,brand_v_5_max,brand_v_5_min,brand_v_5_std,brand_v_6_mean,brand_v_6_max,brand_v_6_min,brand_v_6_std,brand_v_7_mean,brand_v_7_max,brand_v_7_min,brand_v_7_std,brand_v_8_mean,brand_v_8_max,brand_v_8_min,brand_v_8_std,brand_v_9_mean,brand_v_9_max,brand_v_9_min,brand_v_9_std,brand_v_10_mean,brand_v_10_max,brand_v_10_min,brand_v_10_std,brand_v_11_mean,brand_v_11_max,brand_v_11_min,brand_v_11_std,brand_v_12_mean,brand_v_12_max,brand_v_12_min,brand_v_12_std,brand_v_13_mean,brand_v_13_max,brand_v_13_min,brand_v_13_std,brand_v_14_mean,brand_v_14_max,brand_v_14_min,brand_v_14_std,bodyType_v_0_mean,bodyType_v_0_max,bodyType_v_0_min,bodyType_v_0_std,bodyType_v_1_mean,bodyType_v_1_max,bodyType_v_1_min,bodyType_v_1_std,bodyType_v_2_mean,bodyType_v_2_max,bodyType_v_2_min,bodyType_v_2_std,bodyType_v_3_mean,bodyType_v_3_max,bodyType_v_3_min,bodyType_v_3_std,bodyType_v_4_mean,bodyType_v_4_max,bodyType_v_4_min,bodyType_v_4_std,bodyType_v_5_mean,bodyType_v_5_max,bodyType_v_5_min,bodyType_v_5_std,bodyType_v_6_mean,bodyType_v_6_max,bodyType_v_6_min,bodyType_v_6_std,bodyType_v_7_mean,bodyType_v_7_max,bodyType_v_7_min,bodyType_v_7_std,bodyType_v_8_mean,bodyType_v_8_max,bodyType_v_8_min,bodyType_v_8_std,bodyType_v_9_mean,bodyType_v_9_max,bodyType_v_9_min,bodyType_v_9_std,bodyType_v_10_mean,bodyType_v_10_max,bodyType_v_10_min,bodyType_v_10_std,bodyType_v_11_mean,bodyType_v_11_max,bodyType_v_11_min,bodyType_v_11_std,bodyType_v_12_mean,bodyType_v_12_max,bodyType_v_12_min,bodyType_v_12_std,bodyType_v_13_mean,bodyType_v_13_max,bodyType_v_13_min,bodyType_v_13_std,bodyType_v_14_mean,bodyType_v_14_max,bodyType_v_14_min,bodyType_v_14_std,name_model_price_mean,name_model_price_max,name_model_price_min,name_model_price_std,name_model_price_median,name_brand_price_mean,name_brand_price_max,name_brand_price_min,name_brand_price_std,name_brand_price_median,name_bodyType_price_mean,name_bodyType_price_max,name_bodyType_price_min,name_bodyType_price_std,name_bodyType_price_median,model_brand_price_mean,model_brand_price_max,model_brand_price_min,model_brand_price_std,model_brand_price_median,model_bodyType_price_mean,model_bodyType_price_max,model_bodyType_price_min,model_bodyType_price_std,model_bodyType_price_median,brand_bodyType_price_mean,brand_bodyType_price_max,brand_bodyType_price_min,brand_bodyType_price_std,brand_bodyType_price_median,name_price_mean,name_price_max,name_price_min,name_price_std,model_price_mean,model_price_max,model_price_min,model_price_std,brand_price_mean,brand_price_max,brand_price_min,brand_price_std,bodyType_price_mean,bodyType_price_max,bodyType_price_min,bodyType_price_std,v_0_add_v_4,v_0_add_v_8,v_1_add_v_3,v_1_add_v_4,v_1_add_v_5,v_1_add_v_12,v_2_add_v_3,v_4_add_v_11,v_4_add_v_12,v_0_add_v_12_add_v_14,v_4_add_v_9_minu_v_13,v_2_add_v_4_minu_v_11,v_2_add_v_3_minu_v_11
0,3,71865,1996-09-08,109.0,10,0.0,0.0,1.0,193,15.0,0.0,434,2016-03-12,7.783641,45.687478,4.492574,-0.050616,0.8836,-2.228079,0.274293,0.1103,0.121964,0.033395,0.0,-4.509599,1.28594,-0.501868,-2.438353,-0.478699,2.845489,45.687478,-4.509599,12.012923,2.865196,0.02787,2.884903,2.845489,45.746371,0.083287,45.805264,45.687478,-4.529689,0.028412,-4.509599,-4.549779,12.021027,0.01146,12.02913,12.012923,2,1996,2016,3,7125,19.5,45.746371,45.805264,45.687478,0.083287,4.490107,4.492574,4.48764,0.003489,-0.020206,0.010205,-0.050616,0.043007,0.810626,0.8836,0.737653,0.1032,-2.140025,-2.051971,-2.228079,0.124527,0.273842,0.274293,0.273391,0.000638,0.110296,0.1103,0.110292,6e-06,0.122395,0.122827,0.121964,0.00061,0.034606,0.035818,0.033395,0.001714,0.001458,0.002917,0.0,0.002063,-4.529689,-4.509599,-4.549779,0.028412,1.26342,1.28594,1.2409,0.031848,-0.419655,-0.337443,-0.501868,0.116266,-2.34033,-2.242307,-2.438353,0.138625,-0.425282,-0.371864,-0.478699,0.075544,46.283459,49.593459,33.160069,1.689191,-0.399128,6.34596,-4.021618,3.87528,0.552805,17.326224,-2.079725,2.360423,-0.980554,9.386558,-5.140312,1.666109,-1.836813,0.014833,-2.950622,0.441067,0.26592,0.285229,0.0,0.03784,0.041138,0.137258,0.0,0.055093,0.154166,1.330965,0.056659,0.164224,0.0724,0.136434,0.0,0.026007,0.007868,0.058622,0.0,0.008998,-0.478734,12.073557,-7.751995,3.761121,-1.103826,16.866799,-4.063705,2.822401,1.933103,9.675053,-4.235065,1.934287,-1.800664,-0.379152,-2.848351,0.538405,0.100085,1.347366,-2.195474,0.486367,45.761933,51.499264,32.724681,1.864937,0.207616,7.073941,-4.092314,3.839039,0.193285,18.656751,-3.277947,2.44073,-0.305128,9.386558,-5.576299,1.883042,-0.969003,2.933792,-3.17606,0.63298,0.261675,0.291216,0.0,0.038163,0.04952,0.153403,0.0,0.054519,0.133461,1.350293,0.0,0.16895,0.062333,0.15971,0.0,0.028112,0.033243,0.130999,0.0,0.018226,-0.790003,12.118343,-8.436945,3.793868,-0.629856,18.672941,-5.662163,2.861841,0.92186,13.562011,-8.291868,2.176644,-1.105286,1.446954,-2.848351,0.694736,-0.210007,2.113267,-6.113291,0.912552,44.599204,51.108768,30.607394,2.406582,0.037629,7.073941,-4.066382,3.689537,0.177574,18.802118,-3.913507,3.286344,0.174863,9.386558,-5.409722,1.960461,-0.363355,4.877271,-3.687153,1.042848,0.250857,0.274907,0.0,0.050488,0.046412,0.153403,0.0,0.052394,0.135973,1.350293,0.0,0.226768,0.056481,0.160791,0.0,0.028872,0.050751,0.213617,0.0,0.031643,-0.123016,12.285299,-8.05112,3.857112,0.026115,18.672941,-4.489703,3.476407,0.094648,13.083661,-9.639552,2.529009,-0.422623,5.232337,-2.908192,1.178806,0.082971,2.521568,-4.905806,0.860968,8.682877,8.682877,8.682877,,8.682877,8.682877,8.682877,8.682877,,8.682877,8.682877,8.682877,8.682877,,8.682877,9.063339,11.396403,4.077537,0.937684,9.047939,9.068722,11.396403,4.077537,0.923498,8.993471,8.307066,11.492733,2.772589,1.078329,8.29405,8.682877,8.682877,8.682877,,9.063339,11.396403,4.077537,0.937684,8.53241,11.492733,2.772589,1.074179,8.075501,11.512925,2.564949,1.177114,43.459399,45.720873,5.376174,2.264495,4.766867,3.990706,0.832984,-0.942139,-2.729947,44.706911,0.210274,-3.564634,-0.452956
1,7,165346,1999-07-06,26.0,14,1.0,0.0,0.0,101,15.0,0.0,4000,2016-03-26,6.908755,42.255586,-3.167771,-0.676693,1.942673,0.524206,0.239506,0.0,0.122943,0.039839,0.082413,3.693829,-0.245014,-2.19281,0.236728,0.195567,2.870067,42.255586,-3.167771,11.003911,2.870067,,2.870067,2.870067,42.255586,,42.255586,42.255586,-3.167771,,-3.167771,-3.167771,11.003911,,11.003911,11.003911,1,1999,2016,3,6108,16.7,42.255586,42.255586,42.255586,,-3.167771,-3.167771,-3.167771,,-0.676693,-0.676693,-0.676693,,1.942673,1.942673,1.942673,,0.524206,0.524206,0.524206,,0.239506,0.239506,0.239506,,0.0,0.0,0.0,,0.122943,0.122943,0.122943,,0.039839,0.039839,0.039839,,0.082413,0.082413,0.082413,,3.693829,3.693829,3.693829,,-0.245014,-0.245014,-0.245014,,-2.19281,-2.19281,-2.19281,,0.236728,0.236728,0.236728,,0.195567,0.195567,0.195567,,43.23738,46.958564,32.371473,1.949115,0.097362,6.152853,-3.754205,3.501674,-0.617819,17.420581,-3.252191,2.982458,0.498879,6.64323,-4.552713,1.891532,0.394413,2.64902,-2.000164,0.419189,0.247642,0.274497,0.0,0.044562,0.045543,0.130159,0.0,0.049705,0.079859,1.226429,0.0,0.201992,0.049942,0.133253,0.0,0.027209,0.073609,0.155208,0.0,0.014437,0.234805,11.704385,-5.993948,3.569904,0.289488,18.506458,-3.855121,3.225612,-1.233676,9.850044,-7.141109,2.21509,0.618517,2.132542,-0.299557,0.586373,0.492471,1.941446,-1.016079,0.537023,42.841963,48.150986,31.59004,2.161269,-0.096975,6.152853,-3.872681,3.431446,-0.357044,17.608037,-3.913507,3.162987,0.685597,8.18264,-4.991227,1.97421,0.720324,3.666635,-2.123121,0.714119,0.239708,0.285742,0.0,0.046992,0.042813,0.130355,0.0,0.048725,0.098755,1.283486,0.0,0.215163,0.04904,0.14351,0.0,0.028246,0.082934,0.177625,0.0,0.023044,0.644306,12.034736,-6.904206,3.557552,0.654724,18.768832,-4.474906,3.411585,-1.355077,11.007403,-9.639552,2.355606,0.894896,3.462262,-1.411179,0.792532,0.413458,2.743993,-3.511124,0.985659,42.750069,48.95423,30.451976,2.226227,-0.235229,7.057914,-3.958153,3.432492,0.492417,18.181234,-2.922343,3.252226,1.00657,8.787828,-4.672469,1.907092,0.830614,4.959106,-3.403231,0.993249,0.227624,0.259065,0.0,0.047802,0.041787,0.145574,0.0,0.048671,0.162126,1.320314,0.0,0.221839,0.047869,0.148567,0.0,0.027424,0.085464,0.211769,0.0,0.030926,1.025134,12.148416,-7.389607,3.590215,1.337204,18.819042,-2.755963,3.392538,-1.070567,11.68346,-9.404106,2.417158,0.787157,5.635374,-2.268881,1.134104,0.341143,2.743993,-4.393808,0.893906,,,,,,,,,,,,,,,,7.566696,10.203629,3.044522,1.089852,7.550135,6.678482,9.392745,4.110874,0.978605,6.704245,7.075221,10.571086,2.564949,1.106034,7.003974,,,,,7.566696,10.203629,3.044522,1.089852,7.443683,10.571086,2.564949,1.121691,7.364327,10.71444,2.564949,1.106736,42.779792,42.295425,-1.225098,-2.643566,-2.928265,-5.360581,1.26598,0.279192,-1.668604,40.258344,0.36989,0.092527,1.510994
2,12,120103,2001-03-07,48.0,14,1.0,0.0,0.0,58,6.0,0.0,2753,2016-03-21,7.378384,42.309224,-3.082286,-0.604813,0.843333,0.388727,0.240775,0.000116,0.104573,0.053303,0.07425,3.477291,-0.46145,-1.442835,0.659255,1.19935,2.917254,42.309224,-3.082286,10.985315,2.917254,,2.917254,2.917254,42.309224,,42.309224,42.309224,-3.082286,,-3.082286,-3.082286,10.985315,,10.985315,10.985315,1,2001,2016,3,5493,15.0,42.309224,42.309224,42.309224,,-3.082286,-3.082286,-3.082286,,-0.604813,-0.604813,-0.604813,,0.843333,0.843333,0.843333,,0.388727,0.388727,0.388727,,0.240775,0.240775,0.240775,,0.000116,0.000116,0.000116,,0.104573,0.104573,0.104573,,0.053303,0.053303,0.053303,,0.07425,0.07425,0.07425,,3.477291,3.477291,3.477291,,-0.46145,-0.46145,-0.46145,,-1.442835,-1.442835,-1.442835,,0.659255,0.659255,0.659255,,1.19935,1.19935,1.19935,,42.10477,46.025528,31.743948,1.887885,-0.107224,5.826562,-3.740026,3.389043,-0.004472,17.119884,-3.675345,2.951997,1.47318,8.18264,-4.672469,1.853256,0.580186,2.923399,-2.115901,0.406949,0.232004,0.268385,0.0,0.043576,0.042887,0.127347,0.0,0.048137,0.137347,1.223546,0.0,0.199176,0.039326,0.137248,0.0,0.026604,0.078671,0.153368,0.0,0.014759,1.112093,12.034736,-5.583907,3.413814,1.484118,18.768832,-3.061674,3.208397,-1.963093,10.093756,-9.404106,2.184125,0.710321,2.348836,-0.255646,0.571373,1.150741,2.743993,-0.498601,0.52843,42.841963,48.150986,31.59004,2.161269,-0.096975,6.152853,-3.872681,3.431446,-0.357044,17.608037,-3.913507,3.162987,0.685597,8.18264,-4.991227,1.97421,0.720324,3.666635,-2.123121,0.714119,0.239708,0.285742,0.0,0.046992,0.042813,0.130355,0.0,0.048725,0.098755,1.283486,0.0,0.215163,0.04904,0.14351,0.0,0.028246,0.082934,0.177625,0.0,0.023044,0.644306,12.034736,-6.904206,3.557552,0.654724,18.768832,-4.474906,3.411585,-1.355077,11.007403,-9.639552,2.355606,0.894896,3.462262,-1.411179,0.792532,0.413458,2.743993,-3.511124,0.985659,42.750069,48.95423,30.451976,2.226227,-0.235229,7.057914,-3.958153,3.432492,0.492417,18.181234,-2.922343,3.252226,1.00657,8.787828,-4.672469,1.907092,0.830614,4.959106,-3.403231,0.993249,0.227624,0.259065,0.0,0.047802,0.041787,0.145574,0.0,0.048671,0.162126,1.320314,0.0,0.221839,0.047869,0.148567,0.0,0.027424,0.085464,0.211769,0.0,0.030926,1.025134,12.148416,-7.389607,3.590215,1.337204,18.819042,-2.755963,3.392538,-1.070567,11.68346,-9.404106,2.417158,0.787157,5.635374,-2.268881,1.134104,0.341143,2.743993,-4.393808,0.893906,,,,,,,,,,,,,,,,7.092135,10.571086,2.564949,1.084586,7.048386,7.084714,10.571086,2.564949,1.084097,7.048386,7.075221,10.571086,2.564949,1.106034,7.003974,,,,,7.092135,10.571086,2.564949,1.084586,7.443683,10.571086,2.564949,1.121691,7.364327,10.71444,2.564949,1.106736,42.697951,42.362527,-2.238953,-2.693559,-2.841511,-4.525121,0.23852,-0.072723,-1.054108,42.065739,-0.196279,0.245364,0.69997
3,16,10036,2011-09-11,105.0,1,0.0,1.0,1.0,239,12.5,0.0,419,2016-03-06,9.259226,48.30777,2.366464,1.160401,-1.641052,0.940788,0.251404,0.082237,0.15008,0.082606,0.088695,-3.625918,-0.621775,3.086576,0.165461,-2.192635,3.240073,48.30777,-3.625918,12.577467,3.228523,0.024834,3.240073,3.178468,48.247809,0.128911,48.30777,47.987981,-3.619016,0.014839,-3.589106,-3.625918,12.564012,0.028927,12.577467,12.505708,16,2011,2016,3,1638,4.5,48.247809,48.30777,47.987981,0.128911,2.372225,2.397188,2.366464,0.012385,1.137702,1.160401,1.039341,0.048801,-1.681938,-1.641052,-1.859112,0.087903,0.894417,0.940788,0.693474,0.099695,0.251722,0.253098,0.251404,0.000683,0.0822,0.082237,0.082039,8e-05,0.147722,0.15008,0.137505,0.005069,0.082886,0.084097,0.082606,0.000601,0.087139,0.088695,0.080393,0.003347,-3.619016,-3.589106,-3.625918,0.014839,-0.617384,-0.598353,-0.621775,0.009442,3.075434,3.086576,3.027151,0.023955,0.163382,0.165461,0.154373,0.00447,-2.196462,-2.192635,-2.213042,0.008226,48.374869,50.45201,38.251828,1.002001,0.393447,5.904699,-3.771979,4.024281,0.593181,18.055316,-0.911589,1.604058,-3.090054,2.430352,-5.123299,0.681749,0.181589,2.227868,-1.058331,0.328256,0.264394,0.274232,0.0,0.024447,0.052805,0.130449,0.0,0.057246,0.0984,1.308958,0.03349,0.111369,0.102832,0.138927,0.041355,0.015919,0.063623,0.140618,0.0,0.011158,-2.215543,8.906024,-7.348268,3.73125,-2.297084,13.250344,-4.719929,2.264113,4.011496,11.381123,-1.177435,1.002556,0.01536,0.56176,-1.45401,0.208737,-2.133525,-1.562493,-3.282606,0.185966,45.830724,50.735456,31.524342,2.083204,0.364463,7.270448,-4.072007,3.900093,0.483341,18.676414,-2.761572,2.552406,-0.441948,9.025449,-5.41133,1.995034,-0.944965,4.640739,-3.687153,0.821781,0.258438,0.290231,0.0,0.03994,0.051914,0.149585,0.0,0.055437,0.149588,1.347007,0.0,0.175599,0.064465,0.153444,0.0,0.029597,0.032663,0.193907,0.0,0.023912,-0.912696,12.177864,-8.129702,3.893576,-0.428075,18.4085,-5.161408,2.974718,1.196065,12.973057,-7.136793,2.365146,-1.066193,4.907355,-2.588877,0.844411,-0.047131,2.232303,-4.096644,0.878395,44.599204,51.108768,30.607394,2.406582,0.037629,7.073941,-4.066382,3.689537,0.177574,18.802118,-3.913507,3.286344,0.174863,9.386558,-5.409722,1.960461,-0.363355,4.877271,-3.687153,1.042848,0.250857,0.274907,0.0,0.050488,0.046412,0.153403,0.0,0.052394,0.135973,1.350293,0.0,0.226768,0.056481,0.160791,0.0,0.028872,0.050751,0.213617,0.0,0.031643,-0.123016,12.285299,-8.05112,3.857112,0.026115,18.672941,-4.489703,3.476407,0.094648,13.083661,-9.639552,2.529009,-0.422623,5.232337,-2.908192,1.178806,0.082971,2.521568,-4.905806,0.860968,9.242547,9.259226,9.159152,0.040855,9.259226,9.242547,9.259226,9.159152,0.040855,9.259226,9.242547,9.259226,9.159152,0.040855,9.259226,9.912501,10.873888,6.077642,0.410614,9.893488,9.839194,10.661743,9.159152,0.421623,9.809232,8.292365,11.338584,3.258097,1.201061,8.268988,9.242547,9.259226,9.159152,0.040855,9.912501,10.873888,6.077642,0.410614,8.601916,11.350359,3.258097,1.141594,8.075501,11.512925,2.564949,1.177114,49.248558,48.390376,0.725412,3.307252,2.617868,5.45304,-0.480651,0.319013,4.027364,49.201711,0.864022,2.722964,0.141124
4,23,8949,1994-04-01,78.0,7,5.0,0.0,0.0,105,15.0,1.0,1266,2016-03-17,6.39693,43.740185,3.408253,-1.850466,2.593211,0.749961,0.263572,0.093292,0.016425,0.013495,0.094,-2.891659,1.104114,-3.580304,0.157992,-1.133201,2.851925,43.740185,-3.580304,11.455894,3.13369,0.780446,5.719491,2.813408,42.604296,1.979051,43.740185,36.078352,-3.029818,0.770854,-0.576329,-3.580304,11.177119,0.309292,11.455894,10.177523,13,1994,2016,3,8021,22.0,42.604296,43.740185,36.078352,1.979051,3.392179,3.480768,2.894221,0.151657,0.165556,16.40863,-1.850466,4.8874,2.373647,2.767055,1.864658,0.246067,0.29045,1.093825,-1.085029,0.732318,0.235655,0.265004,0.0,0.071037,0.093348,0.093496,0.093197,9.5e-05,0.149986,1.257362,0.016425,0.333213,0.018821,0.04379,0.01121,0.008324,0.072742,0.102214,0.016223,0.02804,-2.063293,4.287075,-2.891659,1.910724,3.023681,17.703144,1.104114,4.416716,-2.640207,4.488613,-3.580304,2.153765,-0.058828,0.529896,-1.163716,0.622003,-0.652681,0.321767,-1.172999,0.597919,43.846397,47.207891,33.70643,1.714041,0.03609,5.975993,-3.734901,3.519267,-0.349101,16.908573,-2.317581,2.554114,0.295346,5.481837,-4.504541,1.777494,0.867863,2.823278,-0.494131,0.367472,0.244362,0.263572,0.0,0.037258,0.045416,0.12733,0.0,0.049972,0.090787,1.229416,0.0,0.170104,0.054963,0.12916,0.0,0.025682,0.088662,0.154929,0.02286,0.013357,0.094676,10.916593,-5.522044,3.460122,0.10712,18.442443,-3.329517,2.894771,-0.693025,7.921192,-5.775117,2.052663,0.822068,2.255984,-0.122311,0.514273,-0.544766,0.85548,-1.728086,0.499202,44.050498,49.091123,32.918002,2.134748,-0.051811,6.942287,-3.851489,3.489285,-0.346867,18.150969,-3.235665,3.024275,0.27916,6.025184,-5.285446,2.006296,0.667674,3.058497,-1.817379,0.719114,0.246729,0.286754,0.0,0.045322,0.044379,0.143141,0.0,0.049413,0.093998,1.295691,0.0,0.208701,0.05527,0.155576,0.0,0.028132,0.083012,0.157738,0.0,0.022347,0.088073,11.363643,-7.335358,3.695565,-0.045598,18.478554,-4.762878,3.224809,-0.567982,12.433202,-6.211157,2.297499,0.571843,2.705492,-1.487128,0.862699,-0.715986,1.843731,-3.576179,0.845312,45.788562,51.67111,32.447596,2.085405,-0.203546,7.270448,-4.236904,3.755332,-0.671247,19.035496,-4.470671,2.037709,-0.55869,8.12318,-5.400667,2.136883,-0.645725,4.417057,-4.233626,1.039968,0.270023,0.28704,0.0,0.029413,0.042879,0.149585,0.0,0.053424,0.07034,1.411559,0.0,0.131478,0.065,0.15422,0.0,0.031709,0.044739,0.193612,0.0,0.029946,-0.640481,11.598273,-8.650125,3.663618,-1.484074,18.655089,-5.12658,2.451685,0.599961,13.847792,-8.291868,2.672617,-0.657626,4.816058,-4.157649,1.120591,-0.278243,2.28875,-4.544762,1.082901,6.9322,7.71913,6.216606,0.532219,6.908755,6.680958,7.71913,5.70711,0.642489,6.907755,,,,,,7.744373,10.195821,4.615121,1.033657,7.71913,6.47851,6.47851,6.47851,,6.47851,7.509009,10.568775,5.01728,1.280361,7.601402,6.680958,7.71913,5.70711,0.642489,7.744373,10.195821,4.615121,1.033657,7.773644,10.568775,4.110874,1.138242,8.534409,11.512925,3.044522,1.312463,44.490145,43.75368,6.001464,4.158214,3.671825,-0.172051,0.742745,1.854074,-2.830344,39.026679,0.685968,-2.204619,-0.361369


In [24]:
df_feature.shape

(200000, 354)

In [25]:
df_feature.to_pickle('feature.pickle')