In [1]:
import time
import pandas as pd
import numpy as np
import lightgbm as lgb
import config
from sklearn.metrics import log_loss

In [2]:
###组合信息
timeFeatList = [
    ['user_id','day'],
    ['user_id','day','hour'],
    ['item_id','day'],
    ['item_id','day','hour'],
    ['shop_id','day'],
    ['shop_id','day','hour'],
    ['item_brand_id','day'],
    ['item_brand_id','day','hour'],
    ['item_city_id','day'],
    ['item_city_id','day','hour'],
]

In [3]:
def timestamp_datetime(value):
    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(value))

def time_feat(df,featList,featName):
    tmp = df.groupby(featList).size().reset_index().rename(columns={0:featName})
    df = df.merge(tmp,'left',on=featList)
    return df

def process(df):
    df['time'] = df.context_timestamp.apply(timestamp_datetime)
    df['day'] = df.time.apply(lambda x: int(x[8:10]))
    df['hour'] = df.time.apply(lambda x: int(x[11:13]))
    for lst in timeFeatList:
        df = time_feat(df,lst,'_'.join(lst))
    df['item_property_list'] = dfTrain['item_property_list'].apply(lambda x:';'.join(sorted(set(str(x).split(';')))))
    df["missing_feat"] = np.sum((df == -1).values, axis=1)
    return df

In [20]:
dfTrain = pd.read_table(config.TRAIN_FILE,sep=' ')
dfTrain.drop_duplicates(inplace=True)
dfTest = pd.read_table(config.TEST_FILE,sep=' ')

dfTrain = process(dfTrain)
dfTest = process(dfTest)

In [39]:
train_idx = dfTrain.loc[dfTrain['day']<24].index
valid_idx = dfTrain.loc[dfTrain['day']==24].index

featList = ['item_id','item_brand_id','item_city_id','item_price_level','item_sales_level','item_collected_level','item_pv_level',
            'user_gender_id','user_age_level','user_occupation_id','user_star_level',
            'context_page_id','item_property_list',
            'shop_id','shop_review_num_level',
            'is_trade'
]

Xi_train_ = dfTrain.loc[train_idx,featList]
Xi_valid_ = dfTrain.loc[valid_idx,featList]

In [40]:
naFill = Xi_train_['is_trade'].mean()
for var in featList[:-1]:
    Xi_valid_ =Xi_valid_.merge(Xi_train_.groupby(var)['is_trade'].mean().reset_index().rename(columns={'is_trade':'is_trade_'+var}),'left',on=var).fillna(naFill-0.002)

In [41]:
for var in featList[:-1]:
    print(var)
    print(log_loss(Xi_valid_['is_trade'], Xi_valid_['is_trade_'+var]))

item_id
0.164508097503
item_brand_id
0.106458787713
item_city_id
0.0871674952809
item_price_level
0.08448929218
item_sales_level
0.0848416444282
item_collected_level
0.085556296131
item_pv_level
0.085604273448
user_gender_id
0.0856237040705
user_age_level
0.0856016794557
user_occupation_id
0.0856860338078
user_star_level
0.0856776825034
context_page_id
0.0856012697589
item_property_list
0.180038916187
shop_id
0.122164200327
shop_review_num_level
0.0855232943286


In [42]:
for i in range(len(featList[:-1])-2):
    for j in range(i+1,len(featList[:-1])-1):
        for k in range(j+1,len(featList[:-1])):
            print(featList[i],featList[j],featList[k])
            print(log_loss(Xi_valid_['is_trade'], Xi_valid_[['is_trade_'+featList[i],'is_trade_'+featList[j],'is_trade_'+featList[k]]].mean(axis=1)))

item_id item_brand_id item_city_id
0.0845891605086
item_id item_brand_id item_price_level
0.0834249547992
item_id item_brand_id item_sales_level
0.0834272891554
item_id item_brand_id item_collected_level
0.0834861195031
item_id item_brand_id item_pv_level
0.0834968990507
item_id item_brand_id user_gender_id
0.0834961278624
item_id item_brand_id user_age_level
0.0834407271629
item_id item_brand_id user_occupation_id
0.0834790304698
item_id item_brand_id user_star_level
0.0834683456873
item_id item_brand_id context_page_id
0.0834746373808
item_id item_brand_id item_property_list
0.0867137411025
item_id item_brand_id shop_id
0.1028285011
item_id item_brand_id shop_review_num_level
0.083517435933
item_id item_city_id item_price_level
0.0834011290394
item_id item_city_id item_sales_level
0.0834790723842
item_id item_city_id item_collected_level
0.0836258995569
item_id item_city_id item_pv_level
0.0836595505651
item_id item_city_id user_gender_id
0.0836515779154
item_id item_city_id user_age

0.0841211616084
item_brand_id user_age_level item_property_list
0.0842677549627
item_brand_id user_age_level shop_id
0.0833959177813
item_brand_id user_age_level shop_review_num_level
0.0841011659368
item_brand_id user_occupation_id user_star_level
0.0841841396218
item_brand_id user_occupation_id context_page_id
0.0841747105382
item_brand_id user_occupation_id item_property_list
0.0843220613117
item_brand_id user_occupation_id shop_id
0.0834380099281
item_brand_id user_occupation_id shop_review_num_level
0.0841579890293
item_brand_id user_star_level context_page_id
0.0841568774041
item_brand_id user_star_level item_property_list
0.0843057574955
item_brand_id user_star_level shop_id
0.0834265387768
item_brand_id user_star_level shop_review_num_level
0.0841414472846
item_brand_id context_page_id item_property_list
0.0842931734656
item_brand_id context_page_id shop_id
0.0834240540844
item_brand_id context_page_id shop_review_num_level
0.0841337809438
item_brand_id item_property_list shop_

0.0851495729785
item_sales_level item_collected_level user_occupation_id
0.0852035287707
item_sales_level item_collected_level user_star_level
0.0851848690363
item_sales_level item_collected_level context_page_id
0.0851735175851
item_sales_level item_collected_level item_property_list
0.0849793399233
item_sales_level item_collected_level shop_id
0.0834831654467
item_sales_level item_collected_level shop_review_num_level
0.0851186221361
item_sales_level item_pv_level user_gender_id
0.0851904431859
item_sales_level item_pv_level user_age_level
0.0851579823776
item_sales_level item_pv_level user_occupation_id
0.085210255037
item_sales_level item_pv_level user_star_level
0.0851915388263
item_sales_level item_pv_level context_page_id
0.0851795824044
item_sales_level item_pv_level item_property_list
0.0849959276898
item_sales_level item_pv_level shop_id
0.0834947162167
item_sales_level item_pv_level shop_review_num_level
0.0851231790707
item_sales_level user_gender_id user_age_level
0.085156

0.0852615725328
user_gender_id shop_id shop_review_num_level
0.083730473638
user_age_level user_occupation_id user_star_level
0.0855874239697
user_age_level user_occupation_id context_page_id
0.0855664821885
user_age_level user_occupation_id item_property_list
0.0852986890133
user_age_level user_occupation_id shop_id
0.0837214262992
user_age_level user_occupation_id shop_review_num_level
0.0855023653891
user_age_level user_star_level context_page_id
0.0855472360481
user_age_level user_star_level item_property_list
0.0852812282595
user_age_level user_star_level shop_id
0.0837103686528
user_age_level user_star_level shop_review_num_level
0.0854841891011
user_age_level context_page_id item_property_list
0.0852613231658
user_age_level context_page_id shop_id
0.0836966526969
user_age_level context_page_id shop_review_num_level
0.0854652477874
user_age_level item_property_list shop_id
0.0839722993079
user_age_level item_property_list shop_review_num_level
0.0852214033668
user_age_level shop_