In [1]:
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
import time
from pyfm import pylibfm
import numpy as np
import gc



In [2]:
pd.set_option("display.max_columns", 101)


In [3]:
log = pd.read_csv("~/environment/log.tsv", sep="\t")
events = pd.read_csv("~/environment/events.tsv", sep="\t")
users = pd.read_csv("~/environment/users.tsv", sep="\t")

In [4]:
print(log.shape)
log.head()

(1452089, 7)


Unnamed: 0,user_id,event_id,time_stamp,action_type,num_of_people,payment_method,total_price
0,1,6261,2015-03-19 09:15:50,3,1.0,クレカ,4000.0
1,1,127600,2017-06-05 12:33:17,1,,,
2,1,127600,2017-06-05 12:37:09,1,,,
3,1,125296,2017-06-06 14:01:27,1,,,
4,1,125128,2017-06-06 14:03:57,1,,,


In [5]:
print(events.shape)
events.head()

(158392, 11)


Unnamed: 0,event_id,female_age_lower,female_age_upper,male_age_lower,male_age_upper,event_start_at,prefecture,first_published_at,female_price,male_price,interest
0,1,20,99.0,25,44.0,2015-03-08 18:00:00,宮城県,,1500.0,5000.0,
1,2,30,99.0,30,49.0,2015-03-01 15:00:00,富山県,,1000.0,6500.0,
2,3,22,99.0,24,39.0,2015-03-07 19:00:00,富山県,,1000.0,6000.0,
3,4,20,99.0,25,44.0,2015-03-08 18:00:00,新潟県,,1000.0,6000.0,
4,5,20,99.0,20,39.0,2015-03-14 19:00:00,新潟県,,1000.0,6000.0,


In [6]:
print(users.shape)
users.head()

(69667, 5)


Unnamed: 0,user_id,age,gender,prefecture,created_on
0,1,34,女性,静岡県,2012-06-26
1,2,31,男性,鳥取県,2012-07-12
2,3,32,男性,東京都,2012-07-15
3,4,30,男性,東京都,2012-07-17
4,5,29,女性,埼玉県,2012-07-17


In [7]:
log.loc[:,"user_id_str"] = log["user_id"].apply(lambda x:str(x))
log.loc[:,"event_id_str"] = log["event_id"].apply(lambda x:str(x))
log.head()

Unnamed: 0,user_id,event_id,time_stamp,action_type,num_of_people,payment_method,total_price,user_id_str,event_id_str
0,1,6261,2015-03-19 09:15:50,3,1.0,クレカ,4000.0,1,6261
1,1,127600,2017-06-05 12:33:17,1,,,,1,127600
2,1,127600,2017-06-05 12:37:09,1,,,,1,127600
3,1,125296,2017-06-06 14:01:27,1,,,,1,125296
4,1,125128,2017-06-06 14:03:57,1,,,,1,125128


In [8]:
data1 = pd.merge(log,events, on="event_id")
data = pd.merge(data1, users, on="user_id")
data = data.rename(columns = {"prefecture_x": "event_prefercture", "prefecture_y":"user_prefecture"})
data.head()

Unnamed: 0,user_id,event_id,time_stamp,action_type,num_of_people,payment_method,total_price,user_id_str,event_id_str,female_age_lower,female_age_upper,male_age_lower,male_age_upper,event_start_at,event_prefercture,first_published_at,female_price,male_price,interest,age,gender,user_prefecture,created_on
0,1,6261,2015-03-19 09:15:50,3,1.0,クレカ,4000.0,1,6261,27,39.0,27,39.0,2015-03-22 14:00:00,東京都,,4000.0,6500.0,,34,女性,静岡県,2012-06-26
1,1,127600,2017-06-05 12:33:17,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26
2,1,127600,2017-06-05 12:37:09,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26
3,1,127600,2017-06-05 12:32:04,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26
4,1,127600,2017-06-06 09:37:45,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26


In [9]:
data.loc[:,"event_date"] = pd.to_datetime(data["event_start_at"])
data.loc[:,"event_month"] = data.loc[:,"event_date"].apply(lambda x:str(x.month))
data.loc[:,"event_day"] = data.loc[:,"event_date"].apply(lambda x:str(x.day))
data.loc[:,"event_weekday"] = data.loc[:,"event_date"].apply(lambda x:str(x.dayofweek))
data.loc[:,"event_hour"] = data.loc[:,"event_date"].apply(lambda x:str(x.hour))

data.head()

Unnamed: 0,user_id,event_id,time_stamp,action_type,num_of_people,payment_method,total_price,user_id_str,event_id_str,female_age_lower,female_age_upper,male_age_lower,male_age_upper,event_start_at,event_prefercture,first_published_at,female_price,male_price,interest,age,gender,user_prefecture,created_on,event_date,event_month,event_day,event_weekday,event_hour
0,1,6261,2015-03-19 09:15:50,3,1.0,クレカ,4000.0,1,6261,27,39.0,27,39.0,2015-03-22 14:00:00,東京都,,4000.0,6500.0,,34,女性,静岡県,2012-06-26,2015-03-22 14:00:00,3,22,6,14
1,1,127600,2017-06-05 12:33:17,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26,2017-06-11 10:00:00,6,11,6,10
2,1,127600,2017-06-05 12:37:09,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26,2017-06-11 10:00:00,6,11,6,10
3,1,127600,2017-06-05 12:32:04,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26,2017-06-11 10:00:00,6,11,6,10
4,1,127600,2017-06-06 09:37:45,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26,2017-06-11 10:00:00,6,11,6,10


In [10]:
data.loc[:,"user_date"] = pd.to_datetime(data["created_on"])
data.loc[:,"user_year"] = data.loc[:,"user_date"].apply(lambda x:str(x.year))
data.loc[:,"user_month"] = data.loc[:,"user_date"].apply(lambda x:str(x.month))

data.head()

Unnamed: 0,user_id,event_id,time_stamp,action_type,num_of_people,payment_method,total_price,user_id_str,event_id_str,female_age_lower,female_age_upper,male_age_lower,male_age_upper,event_start_at,event_prefercture,first_published_at,female_price,male_price,interest,age,gender,user_prefecture,created_on,event_date,event_month,event_day,event_weekday,event_hour,user_date,user_year,user_month
0,1,6261,2015-03-19 09:15:50,3,1.0,クレカ,4000.0,1,6261,27,39.0,27,39.0,2015-03-22 14:00:00,東京都,,4000.0,6500.0,,34,女性,静岡県,2012-06-26,2015-03-22 14:00:00,3,22,6,14,2012-06-26,2012,6
1,1,127600,2017-06-05 12:33:17,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26,2017-06-11 10:00:00,6,11,6,10,2012-06-26,2012,6
2,1,127600,2017-06-05 12:37:09,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26,2017-06-11 10:00:00,6,11,6,10,2012-06-26,2012,6
3,1,127600,2017-06-05 12:32:04,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26,2017-06-11 10:00:00,6,11,6,10,2012-06-26,2012,6
4,1,127600,2017-06-06 09:37:45,1,,,,1,127600,24,36.0,26,38.0,2017-06-11 10:00:00,埼玉県,2017-05-01 14:57:33,2000.0,6000.0,,34,女性,静岡県,2012-06-26,2017-06-11 10:00:00,6,11,6,10,2012-06-26,2012,6


In [11]:

def loaddata(df, feature, target):
    data = []
    y = []
    df = df.loc[:,features + target]
    for _ , row in df.iterrows():
        data.append(dict(row))
        y.append(row[target].values)
    
    return data, y

In [12]:
del users,log,events
gc.collect()

638

In [None]:
features = ["user_id_str", "event_id_str", "female_age_lower", "female_age_upper", "male_age_lower",
           "male_age_upper", "female_price", "male_price", "age", "gender", "user_prefecture", "event_month",
           "event_day","event_weekday", "event_hour", "user_year", "user_month"]
target = ["action_type"]

start = time.time() #時間計測



train_X , train_y= loaddata(data,features,target)

elapsed_time = time.time() - start #時間計測ここまで

print ("elapsed_time:{0}".format(elapsed_time) + "[sec]")

In [44]:
y_train = np.array(train_y).reshape(-1)
y_train = y_train.astype(np.float64)
print(y_train.shape)
print(y_train.dtype)

(1452089,)
float64


In [41]:
v = DictVectorizer()
X_train = v.fit_transform(train_X)

In [42]:
X_train

<1452089x195895 sparse matrix of type '<class 'numpy.float64'>'
	with 4356267 stored elements in Compressed Sparse Row format>

In [45]:
y_train

array([ 3.,  1.,  1., ...,  1.,  1.,  1.])

In [46]:
fm = pylibfm.FM(num_factors = 10, num_iter=10, verbose=True, task="regression", initial_learning_rate=0.0001, learning_rate_schedule="optimal")


In [47]:
fm.fit(X_train, y_train)

Creating validation dataset of 0.01 of training for adaptive regularization
-- Epoch 1
Training MSE: 0.00103
-- Epoch 2
Training MSE: 0.00012
-- Epoch 3
Training MSE: 0.00012
-- Epoch 4
Training MSE: 0.00012
-- Epoch 5
Training MSE: 0.00012
-- Epoch 6
Training MSE: 0.00012
-- Epoch 7
Training MSE: 0.00012
-- Epoch 8
Training MSE: 0.00012
-- Epoch 9
Training MSE: 0.00012
-- Epoch 10
Training MSE: 0.00012


In [48]:
test = pd.read_csv("~/environment/test.tsv", sep="\t")
events = pd.read_csv("~/environment/events.tsv", sep="\t")

In [51]:
user_list = test["user_id"]
event_list = events["event_id"]

In [78]:
k = 20
i = 0
recommends = []

for user in user_list:
    user_data = []
    result = []
    i = i +1
    print("user_id:"+str(user))
    print("進捗:"+str(i*100/len(user_list))+"%") 
    
    for event in event_list:
        user_data.append({"user_id":str(user), "event_id": str(event)})


    X_test = v.transform(user_data)


    preds = fm.predict(X_test)


    for pred, ids in zip(preds, user_data):
        result.append({"id":(ids["user_id"],ids["event_id"]),"estimated_value":pred})
    
    recommends_by_user = sorted(result, key=lambda x:x["estimated_value"], reverse=True)[:k]
    recommends.append(recommends_by_user)
    
    del user_data
    gc.collect()


user_id:1
進捗:0.03465003465003465%
user_id:6
進捗:0.0693000693000693%
user_id:23
進捗:0.10395010395010396%
user_id:43
進捗:0.1386001386001386%
user_id:46
進捗:0.17325017325017325%
user_id:48
進捗:0.2079002079002079%
user_id:76
進捗:0.24255024255024255%
user_id:79
進捗:0.2772002772002772%
user_id:104
進捗:0.31185031185031187%
user_id:139
進捗:0.3465003465003465%
user_id:152
進捗:0.38115038115038113%
user_id:198
進捗:0.4158004158004158%
user_id:206
進捗:0.45045045045045046%
user_id:233
進捗:0.4851004851004851%
user_id:242
進捗:0.5197505197505198%
user_id:247
進捗:0.5544005544005544%
user_id:263
進捗:0.589050589050589%
user_id:285
進捗:0.6237006237006237%
user_id:313
進捗:0.6583506583506583%
user_id:331
進捗:0.693000693000693%
user_id:333
進捗:0.7276507276507277%
user_id:346
進捗:0.7623007623007623%
user_id:387
進捗:0.796950796950797%
user_id:388
進捗:0.8316008316008316%
user_id:390
進捗:0.8662508662508662%
user_id:396
進捗:0.9009009009009009%
user_id:406
進捗:0.9355509355509356%
user_id:421
進捗:0.9702009702009702%
user_id:454
進捗:1.004851004

user_id:4705
進捗:8.142758142758142%
user_id:4714
進捗:8.177408177408177%
user_id:4759
進捗:8.212058212058212%
user_id:4762
進捗:8.246708246708247%
user_id:4776
進捗:8.281358281358282%
user_id:4850
進捗:8.316008316008316%
user_id:4864
進捗:8.350658350658351%
user_id:4898
進捗:8.385308385308385%
user_id:4951
進捗:8.41995841995842%
user_id:4956
進捗:8.454608454608454%
user_id:4966
進捗:8.48925848925849%
user_id:4976
進捗:8.523908523908524%
user_id:5003
進捗:8.558558558558559%
user_id:5007
進捗:8.593208593208594%
user_id:5009
進捗:8.627858627858627%
user_id:5014
進捗:8.662508662508662%
user_id:5059
進捗:8.697158697158697%
user_id:5086
進捗:8.731808731808732%
user_id:5123
進捗:8.766458766458767%
user_id:5128
進捗:8.801108801108802%
user_id:5144
進捗:8.835758835758837%
user_id:5158
進捗:8.87040887040887%
user_id:5168
進捗:8.905058905058905%
user_id:5169
進捗:8.93970893970894%
user_id:5181
進捗:8.974358974358974%
user_id:5182
進捗:9.00900900900901%
user_id:5208
進捗:9.043659043659044%
user_id:5274
進捗:9.07830907830908%
user_id:5275
進捗:9.11295911

user_id:9042
進捗:16.146916146916148%
user_id:9044
進捗:16.181566181566183%
user_id:9047
進捗:16.216216216216218%
user_id:9057
進捗:16.250866250866252%
user_id:9066
進捗:16.285516285516284%
user_id:9094
進捗:16.32016632016632%
user_id:9106
進捗:16.354816354816354%
user_id:9115
進捗:16.38946638946639%
user_id:9135
進捗:16.424116424116423%
user_id:9253
進捗:16.45876645876646%
user_id:9269
進捗:16.493416493416493%
user_id:9279
進捗:16.528066528066528%
user_id:9307
進捗:16.562716562716563%
user_id:9334
進捗:16.597366597366598%
user_id:9355
進捗:16.632016632016633%
user_id:9359
進捗:16.666666666666668%
user_id:9400
進捗:16.701316701316703%
user_id:9438
進捗:16.735966735966738%
user_id:9487
進捗:16.77061677061677%
user_id:9500
進捗:16.805266805266804%
user_id:9506
進捗:16.83991683991684%
user_id:9585
進捗:16.874566874566874%
user_id:9608
進捗:16.90921690921691%
user_id:9640
進捗:16.943866943866944%
user_id:9653
進捗:16.97851697851698%
user_id:9683
進捗:17.013167013167013%
user_id:9689
進捗:17.04781704781705%
user_id:9694
進捗:17.082467082467083%


user_id:13133
進捗:23.977823977823977%
user_id:13196
進捗:24.012474012474012%
user_id:13257
進捗:24.047124047124047%
user_id:13272
進捗:24.081774081774082%
user_id:13281
進捗:24.116424116424117%
user_id:13312
進捗:24.151074151074152%
user_id:13315
進捗:24.185724185724187%
user_id:13325
進捗:24.22037422037422%
user_id:13354
進捗:24.255024255024257%
user_id:13372
進捗:24.28967428967429%
user_id:13380
進捗:24.324324324324323%
user_id:13419
進捗:24.358974358974358%
user_id:13433
進捗:24.393624393624393%
user_id:13456
進捗:24.428274428274428%
user_id:13471
進捗:24.462924462924462%
user_id:13480
進捗:24.497574497574497%
user_id:13570
進捗:24.532224532224532%
user_id:13618
進捗:24.566874566874567%
user_id:13672
進捗:24.601524601524602%
user_id:13686
進捗:24.636174636174637%
user_id:13701
進捗:24.670824670824672%
user_id:13711
進捗:24.705474705474707%
user_id:13712
進捗:24.74012474012474%
user_id:13716
進捗:24.774774774774773%
user_id:13725
進捗:24.809424809424808%
user_id:13777
進捗:24.844074844074843%
user_id:13809
進捗:24.878724878724878%
user

user_id:18051
進捗:31.73943173943174%
user_id:18054
進捗:31.774081774081775%
user_id:18082
進捗:31.80873180873181%
user_id:18116
進捗:31.84338184338184%
user_id:18130
進捗:31.878031878031877%
user_id:18144
進捗:31.91268191268191%
user_id:18191
進捗:31.947331947331946%
user_id:18278
進捗:31.98198198198198%
user_id:18283
進捗:32.016632016632016%
user_id:18432
進捗:32.05128205128205%
user_id:18444
進捗:32.085932085932086%
user_id:18450
進捗:32.12058212058212%
user_id:18460
進捗:32.155232155232156%
user_id:18464
進捗:32.18988218988219%
user_id:18473
進捗:32.224532224532226%
user_id:18489
進捗:32.25918225918226%
user_id:18498
進捗:32.293832293832295%
user_id:18533
進捗:32.32848232848233%
user_id:18537
進捗:32.363132363132365%
user_id:18583
進捗:32.3977823977824%
user_id:18584
進捗:32.432432432432435%
user_id:18611
進捗:32.46708246708247%
user_id:18622
進捗:32.501732501732505%
user_id:18675
進捗:32.53638253638254%
user_id:18705
進捗:32.57103257103257%
user_id:18732
進捗:32.6056826056826%
user_id:18744
進捗:32.64033264033264%
user_id:18756
進捗:32

user_id:22901
進捗:39.604989604989605%
user_id:22902
進捗:39.63963963963964%
user_id:22912
進捗:39.674289674289675%
user_id:22937
進捗:39.70893970893971%
user_id:22952
進捗:39.743589743589745%
user_id:23017
進捗:39.77823977823978%
user_id:23044
進捗:39.812889812889814%
user_id:23062
進捗:39.84753984753985%
user_id:23085
進捗:39.882189882189884%
user_id:23089
進捗:39.91683991683992%
user_id:23159
進捗:39.951489951489954%
user_id:23180
進捗:39.98613998613999%
user_id:23193
進捗:40.020790020790024%
user_id:23195
進捗:40.05544005544006%
user_id:23209
進捗:40.090090090090094%
user_id:23221
進捗:40.12474012474012%
user_id:23245
進捗:40.159390159390156%
user_id:23252
進捗:40.19404019404019%
user_id:23264
進捗:40.228690228690226%
user_id:23315
進捗:40.26334026334026%
user_id:23323
進捗:40.297990297990296%
user_id:23358
進捗:40.33264033264033%
user_id:23359
進捗:40.367290367290366%
user_id:23380
進捗:40.4019404019404%
user_id:23382
進捗:40.436590436590436%
user_id:23405
進捗:40.47124047124047%
user_id:23429
進捗:40.505890505890505%
user_id:23451
進

user_id:28157
進捗:47.47054747054747%
user_id:28223
進捗:47.50519750519751%
user_id:28232
進捗:47.53984753984754%
user_id:28237
進捗:47.57449757449758%
user_id:28239
進捗:47.60914760914761%
user_id:28246
進捗:47.64379764379764%
user_id:28369
進捗:47.678447678447675%
user_id:28395
進捗:47.71309771309771%
user_id:28448
進捗:47.747747747747745%
user_id:28502
進捗:47.78239778239778%
user_id:28528
進捗:47.817047817047815%
user_id:28530
進捗:47.85169785169785%
user_id:28566
進捗:47.886347886347885%
user_id:28587
進捗:47.92099792099792%
user_id:28634
進捗:47.955647955647954%
user_id:28667
進捗:47.99029799029799%
user_id:28672
進捗:48.024948024948024%
user_id:28674
進捗:48.05959805959806%
user_id:28689
進捗:48.094248094248094%
user_id:28690
進捗:48.12889812889813%
user_id:28717
進捗:48.163548163548164%
user_id:28749
進捗:48.1981981981982%
user_id:28771
進捗:48.232848232848234%
user_id:28773
進捗:48.26749826749827%
user_id:28883
進捗:48.302148302148304%
user_id:28922
進捗:48.33679833679834%
user_id:29057
進捗:48.37144837144837%
user_id:29104
進捗:48

user_id:40221
進捗:55.336105336105334%
user_id:40241
進捗:55.37075537075537%
user_id:40243
進捗:55.4054054054054%
user_id:40329
進捗:55.44005544005544%
user_id:40382
進捗:55.47470547470547%
user_id:40398
進捗:55.50935550935551%
user_id:40413
進捗:55.54400554400554%
user_id:40454
進捗:55.57865557865558%
user_id:40467
進捗:55.61330561330561%
user_id:40495
進捗:55.64795564795565%
user_id:40895
進捗:55.68260568260568%
user_id:40916
進捗:55.71725571725572%
user_id:40949
進捗:55.75190575190575%
user_id:41019
進捗:55.78655578655579%
user_id:41041
進捗:55.82120582120582%
user_id:41097
進捗:55.85585585585586%
user_id:41128
進捗:55.89050589050589%
user_id:41269
進捗:55.92515592515593%
user_id:41314
進捗:55.95980595980596%
user_id:41318
進捗:55.994455994456%
user_id:41321
進捗:56.02910602910603%
user_id:41332
進捗:56.06375606375607%
user_id:41361
進捗:56.0984060984061%
user_id:41362
進捗:56.13305613305613%
user_id:41387
進捗:56.167706167706164%
user_id:41415
進捗:56.2023562023562%
user_id:41453
進捗:56.237006237006234%
user_id:41499
進捗:56.2716562716

user_id:50422
進捗:63.2016632016632%
user_id:50430
進捗:63.23631323631324%
user_id:50434
進捗:63.27096327096327%
user_id:50435
進捗:63.305613305613306%
user_id:50487
進捗:63.34026334026334%
user_id:50489
進捗:63.374913374913376%
user_id:50568
進捗:63.40956340956341%
user_id:50601
進捗:63.444213444213446%
user_id:50640
進捗:63.47886347886348%
user_id:50649
進捗:63.513513513513516%
user_id:50662
進捗:63.54816354816355%
user_id:50666
進捗:63.582813582813586%
user_id:50682
進捗:63.61746361746362%
user_id:50692
進捗:63.652113652113655%
user_id:50737
進捗:63.68676368676368%
user_id:50751
進捗:63.72141372141372%
user_id:50789
進捗:63.75606375606375%
user_id:50881
進捗:63.79071379071379%
user_id:50917
進捗:63.82536382536382%
user_id:50959
進捗:63.86001386001386%
user_id:51155
進捗:63.89466389466389%
user_id:51189
進捗:63.92931392931393%
user_id:51219
進捗:63.96396396396396%
user_id:51319
進捗:63.998613998614%
user_id:51446
進捗:64.03326403326403%
user_id:51453
進捗:64.06791406791407%
user_id:51458
進捗:64.1025641025641%
user_id:51484
進捗:64.137214

user_id:59261
進捗:71.13652113652114%
user_id:59271
進捗:71.17117117117117%
user_id:59282
進捗:71.20582120582121%
user_id:59349
進捗:71.24047124047124%
user_id:59358
進捗:71.27512127512128%
user_id:59362
進捗:71.3097713097713%
user_id:59369
進捗:71.34442134442135%
user_id:59390
進捗:71.37907137907138%
user_id:59396
進捗:71.41372141372142%
user_id:59458
進捗:71.44837144837145%
user_id:59471
進捗:71.48302148302149%
user_id:59506
進捗:71.51767151767152%
user_id:59567
進捗:71.55232155232156%
user_id:59615
進捗:71.58697158697159%
user_id:59662
進捗:71.62162162162163%
user_id:59712
進捗:71.65627165627166%
user_id:59723
進捗:71.6909216909217%
user_id:59782
進捗:71.72557172557173%
user_id:59811
進捗:71.76022176022175%
user_id:59821
進捗:71.7948717948718%
user_id:59859
進捗:71.82952182952182%
user_id:59863
進捗:71.86417186417187%
user_id:59877
進捗:71.8988218988219%
user_id:59883
進捗:71.93347193347194%
user_id:59913
進捗:71.96812196812196%
user_id:59928
進捗:72.002772002772%
user_id:59946
進捗:72.03742203742203%
user_id:59953
進捗:72.07207207207207

user_id:65844
進捗:79.07137907137907%
user_id:65871
進捗:79.10602910602911%
user_id:65875
進捗:79.14067914067914%
user_id:65879
進捗:79.17532917532918%
user_id:65885
進捗:79.20997920997921%
user_id:65892
進捗:79.24462924462925%
user_id:65947
進捗:79.27927927927928%
user_id:65981
進捗:79.31392931392931%
user_id:65982
進捗:79.34857934857935%
user_id:65989
進捗:79.38322938322938%
user_id:66010
進捗:79.41787941787942%
user_id:66032
進捗:79.45252945252945%
user_id:66034
進捗:79.48717948717949%
user_id:66057
進捗:79.52182952182952%
user_id:66147
進捗:79.55647955647956%
user_id:66156
進捗:79.59112959112959%
user_id:66202
進捗:79.62577962577963%
user_id:66210
進捗:79.66042966042966%
user_id:66231
進捗:79.6950796950797%
user_id:66263
進捗:79.72972972972973%
user_id:66306
進捗:79.76437976437977%
user_id:66310
進捗:79.7990297990298%
user_id:66331
進捗:79.83367983367984%
user_id:66338
進捗:79.86832986832987%
user_id:66346
進捗:79.90297990297991%
user_id:66372
進捗:79.93762993762994%
user_id:66393
進捗:79.97227997227998%
user_id:66394
進捗:80.0069300069

user_id:69418
進捗:87.006237006237%
user_id:69421
進捗:87.04088704088704%
user_id:69424
進捗:87.07553707553707%
user_id:69437
進捗:87.11018711018711%
user_id:69438
進捗:87.14483714483714%
user_id:69442
進捗:87.17948717948718%
user_id:69443
進捗:87.21413721413721%
user_id:69449
進捗:87.24878724878725%
user_id:69456
進捗:87.28343728343728%
user_id:69465
進捗:87.31808731808732%
user_id:69483
進捗:87.35273735273735%
user_id:69488
進捗:87.38738738738739%
user_id:69493
進捗:87.42203742203742%
user_id:69506
進捗:87.45668745668746%
user_id:69514
進捗:87.49133749133749%
user_id:69515
進捗:87.52598752598753%
user_id:69522
進捗:87.56063756063756%
user_id:69526
進捗:87.5952875952876%
user_id:69537
進捗:87.62993762993763%
user_id:69542
進捗:87.66458766458767%
user_id:69544
進捗:87.6992376992377%
user_id:69549
進捗:87.73388773388774%
user_id:69553
進捗:87.76853776853777%
user_id:69558
進捗:87.8031878031878%
user_id:69566
進捗:87.83783783783784%
user_id:69577
進捗:87.87248787248787%
user_id:69578
進捗:87.90713790713791%
user_id:69580
進捗:87.9417879417879

user_id:69854
進捗:94.94109494109495%
user_id:69855
進捗:94.97574497574497%
user_id:69856
進捗:95.01039501039502%
user_id:69857
進捗:95.04504504504504%
user_id:69858
進捗:95.07969507969509%
user_id:69859
進捗:95.11434511434511%
user_id:69860
進捗:95.14899514899516%
user_id:69861
進捗:95.18364518364518%
user_id:69862
進捗:95.21829521829522%
user_id:69863
進捗:95.25294525294525%
user_id:69864
進捗:95.28759528759528%
user_id:69865
進捗:95.32224532224532%
user_id:69866
進捗:95.35689535689535%
user_id:69867
進捗:95.39154539154539%
user_id:69868
進捗:95.42619542619542%
user_id:69869
進捗:95.46084546084546%
user_id:69870
進捗:95.49549549549549%
user_id:69871
進捗:95.53014553014553%
user_id:69872
進捗:95.56479556479556%
user_id:69873
進捗:95.5994455994456%
user_id:69874
進捗:95.63409563409563%
user_id:69875
進捗:95.66874566874567%
user_id:69876
進捗:95.7033957033957%
user_id:69877
進捗:95.73804573804574%
user_id:69878
進捗:95.77269577269577%
user_id:69879
進捗:95.80734580734581%
user_id:69880
進捗:95.84199584199584%
user_id:69881
進捗:95.8766458766

In [79]:
recommends
#1としか推測してない


[[{'estimated_value': 1.0, 'id': ('1', '1')},
  {'estimated_value': 1.0, 'id': ('1', '2')},
  {'estimated_value': 1.0, 'id': ('1', '3')},
  {'estimated_value': 1.0, 'id': ('1', '4')},
  {'estimated_value': 1.0, 'id': ('1', '5')},
  {'estimated_value': 1.0, 'id': ('1', '6')},
  {'estimated_value': 1.0, 'id': ('1', '7')},
  {'estimated_value': 1.0, 'id': ('1', '8')},
  {'estimated_value': 1.0, 'id': ('1', '9')},
  {'estimated_value': 1.0, 'id': ('1', '10')},
  {'estimated_value': 1.0, 'id': ('1', '11')},
  {'estimated_value': 1.0, 'id': ('1', '12')},
  {'estimated_value': 1.0, 'id': ('1', '13')},
  {'estimated_value': 1.0, 'id': ('1', '14')},
  {'estimated_value': 1.0, 'id': ('1', '15')},
  {'estimated_value': 1.0, 'id': ('1', '16')},
  {'estimated_value': 1.0, 'id': ('1', '17')},
  {'estimated_value': 1.0, 'id': ('1', '18')},
  {'estimated_value': 1.0, 'id': ('1', '19')},
  {'estimated_value': 1.0, 'id': ('1', '20')}],
 [{'estimated_value': 1.0, 'id': ('6', '1')},
  {'estimated_value': 

In [80]:
f = open('recommend.txt', 'w')
for x in recommends:
    f.write(str(x) + "\n")
f.close()

In [81]:
df = pd.DataFrame(recommends)
df = pd.DataFrame(df.values.flatten())
df = df.rename(columns={0: "result"})

In [82]:
df.head()

Unnamed: 0,result
0,"{'id': ('1', '1'), 'estimated_value': 1.0}"
1,"{'id': ('1', '2'), 'estimated_value': 1.0}"
2,"{'id': ('1', '3'), 'estimated_value': 1.0}"
3,"{'id': ('1', '4'), 'estimated_value': 1.0}"
4,"{'id': ('1', '5'), 'estimated_value': 1.0}"


In [83]:
df.loc[:,"user_id"] = df["result"].apply(lambda x:x["id"][0])
df.loc[:,"event_id"] = df["result"].apply(lambda x:x["id"][1])
df.loc[:,"estimated_value"] = df["result"].apply(lambda x:x["estimated_value"])
#df.loc[:,"rank"] = [20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1] * len(user_list) 
df.loc[:,"rank"] = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20] * len(user_list) 

In [84]:
df.head(50)

Unnamed: 0,result,user_id,event_id,estimated_value,rank
0,"{'id': ('1', '1'), 'estimated_value': 1.0}",1,1,1.0,1
1,"{'id': ('1', '2'), 'estimated_value': 1.0}",1,2,1.0,2
2,"{'id': ('1', '3'), 'estimated_value': 1.0}",1,3,1.0,3
3,"{'id': ('1', '4'), 'estimated_value': 1.0}",1,4,1.0,4
4,"{'id': ('1', '5'), 'estimated_value': 1.0}",1,5,1.0,5
5,"{'id': ('1', '6'), 'estimated_value': 1.0}",1,6,1.0,6
6,"{'id': ('1', '7'), 'estimated_value': 1.0}",1,7,1.0,7
7,"{'id': ('1', '8'), 'estimated_value': 1.0}",1,8,1.0,8
8,"{'id': ('1', '9'), 'estimated_value': 1.0}",1,9,1.0,9
9,"{'id': ('1', '10'), 'estimated_value': 1.0}",1,10,1.0,10


In [85]:
submition = df[["user_id","event_id","rank"]]
print(submition.shape)
submition.head()

(57720, 3)


Unnamed: 0,user_id,event_id,rank
0,1,1,1
1,1,2,2
2,1,3,3
3,1,4,4
4,1,5,5


In [86]:
submition.to_csv("sub.tsv",header=None,index=None,sep = "\t")