In [1]:
import pandas as pd
import numpy as np
import pickle


In [2]:
impression_df = pd.read_csv('./impressions-train.csv')
final_rating_df = pd.read_csv('./ratings-final.csv')
final_test_df = pd.read_csv('./test.csv')
    

In [3]:
with open('./rec_on_impression_5_both', 'rb') as f:
    rec_on_impression = pickle.load(f)
with open('./rec_on_final_rating_5', 'rb') as f:
    rec_on_final_rating = pickle.load(f)

In [4]:
impression_cleaned_df = impression_df.drop_duplicates(subset=['reviewerid', 'movie-code'], keep=False)
final_rating_cleaned_df = final_rating_df.drop_duplicates(subset=['reviewerid', 'movie-code'], keep=False)

In [5]:
# for test
# impression_cleaned_df_test = final_test_df

# for validation
impression_cleaned_df_test = impression_cleaned_df.sample(frac=0.2, random_state=7, axis=0)
impression_cleaned_df = impression_cleaned_df[~impression_cleaned_df.index.isin(impression_cleaned_df_test.index)]

# Feature Engineering

In [6]:
impression_rating_mean = impression_cleaned_df.groupby(['reviewerid']).mean()
impression_rating_mean = impression_rating_mean.reset_index()
impression_rating_mean.rename(columns={'rating':'reviewer_impression_mean'},inplace=True) 

In [7]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df, impression_rating_mean[['reviewerid', 'reviewer_impression_mean']], how='left', on=['reviewerid'])

In [8]:
movie_rating_mean = impression_cleaned_df.groupby(['movie-code']).mean()
movie_rating_mean = movie_rating_mean.reset_index()
movie_rating_mean.rename(columns={'rating':'movie_impression_mean'},inplace=True) 

In [9]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, movie_rating_mean[['movie-code', 'movie_impression_mean']], how='left', on=['movie-code'])

In [10]:
impression_rating_max = impression_cleaned_df.groupby(['reviewerid']).max()
impression_rating_max = impression_rating_max.reset_index()
impression_rating_max.rename(columns={'rating':'reviewer_impression_max'},inplace=True) 

In [11]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, impression_rating_max[['reviewerid', 'reviewer_impression_max']], how='left', on=['reviewerid'])

In [12]:
impression_rating_min = impression_cleaned_df.groupby(['reviewerid']).min()
impression_rating_min = impression_rating_min.reset_index()
impression_rating_min.rename(columns={'rating':'reviewer_impression_min'},inplace=True) 

In [13]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, impression_rating_min[['reviewerid', 'reviewer_impression_min']], how='left', on=['reviewerid'])

In [14]:
user_rating_count = impression_cleaned_df.groupby(['reviewerid']).count()
user_rating_count = user_rating_count.reset_index()
user_rating_count.rename(columns={'rating':'reviewer_rating_count'},inplace=True) 

In [15]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, user_rating_count[['reviewerid', 'reviewer_rating_count']], how='left', on=['reviewerid'])

In [16]:
movie_rating_count = impression_cleaned_df.groupby(['movie-code']).count()
movie_rating_count = movie_rating_count.reset_index()
movie_rating_count.rename(columns={'rating':'movie_rating_count'},inplace=True) 

In [17]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, movie_rating_count[['movie-code', 'movie_rating_count']], how='left', on=['movie-code'])

In [18]:
impression_rating_std = impression_cleaned_df.groupby(['reviewerid']).std()
impression_rating_std = impression_rating_std.reset_index()
impression_rating_std.rename(columns={'rating':'reviewer_impression_std'},inplace=True) 

In [19]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, impression_rating_std[['reviewerid', 'reviewer_impression_std']], how='left', on=['reviewerid'])

In [20]:
movie_rating_std = impression_cleaned_df.groupby(['movie-code']).std()
movie_rating_std = movie_rating_std.reset_index()
movie_rating_std.rename(columns={'rating':'movie_impression_std'},inplace=True) 

In [21]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, movie_rating_std[['movie-code', 'movie_impression_std']], how='left', on=['movie-code'])

In [22]:
reviewer_each_rating_count = impression_cleaned_df.groupby(['reviewerid','rating']).count()
reviewer_each_rating_count = reviewer_each_rating_count.reset_index()
reviewer_each_rating_count.rename(columns={'movie-code':'reviewer_each_rating_count'},inplace=True)

In [23]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         reviewer_each_rating_count[reviewer_each_rating_count['rating']==0][['reviewerid', 'reviewer_each_rating_count']], 
                                         how='left', on=['reviewerid'])
impression_cleaned_df_feature.rename(columns={'reviewer_each_rating_count':'reviewer_rating_count_0'},inplace=True)

impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         reviewer_each_rating_count[reviewer_each_rating_count['rating']==1][['reviewerid', 'reviewer_each_rating_count']], 
                                         how='left', on=['reviewerid'])
impression_cleaned_df_feature.rename(columns={'reviewer_each_rating_count':'reviewer_rating_count_1'},inplace=True)

impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         reviewer_each_rating_count[reviewer_each_rating_count['rating']==2][['reviewerid', 'reviewer_each_rating_count']], 
                                         how='left', on=['reviewerid'])
impression_cleaned_df_feature.rename(columns={'reviewer_each_rating_count':'reviewer_rating_count_2'},inplace=True)

In [24]:
impression_cleaned_df_feature[['reviewer_rating_count_0','reviewer_rating_count_1','reviewer_rating_count_2']] = impression_cleaned_df_feature[['reviewer_rating_count_0','reviewer_rating_count_1','reviewer_rating_count_2']].fillna(0)

In [25]:
impression_cleaned_df_feature['reviewer_rating_percent_0'] = impression_cleaned_df_feature.apply(lambda row: row['reviewer_rating_count_0']/row['reviewer_rating_count'], axis=1)
impression_cleaned_df_feature['reviewer_rating_percent_1'] = impression_cleaned_df_feature.apply(lambda row: row['reviewer_rating_count_1']/row['reviewer_rating_count'], axis=1)
impression_cleaned_df_feature['reviewer_rating_percent_2'] = impression_cleaned_df_feature.apply(lambda row: row['reviewer_rating_count_2']/row['reviewer_rating_count'], axis=1)

In [26]:
movie_each_rating_count = impression_cleaned_df.groupby(['movie-code','rating']).count()
movie_each_rating_count = movie_each_rating_count.reset_index()
movie_each_rating_count.rename(columns={'reviewerid':'movie_each_rating_count'},inplace=True)

In [27]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         movie_each_rating_count[movie_each_rating_count['rating']==0][['movie-code', 'movie_each_rating_count']], 
                                         how='left', on=['movie-code'])
impression_cleaned_df_feature.rename(columns={'movie_each_rating_count':'movie_rating_count_0'},inplace=True)

impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         movie_each_rating_count[movie_each_rating_count['rating']==1][['movie-code', 'movie_each_rating_count']], 
                                         how='left', on=['movie-code'])
impression_cleaned_df_feature.rename(columns={'movie_each_rating_count':'movie_rating_count_1'},inplace=True)

impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         movie_each_rating_count[movie_each_rating_count['rating']==2][['movie-code', 'movie_each_rating_count']], 
                                         how='left', on=['movie-code'])
impression_cleaned_df_feature.rename(columns={'movie_each_rating_count':'movie_rating_count_2'},inplace=True)

In [28]:
impression_cleaned_df_feature[['movie_rating_count_0','movie_rating_count_1','movie_rating_count_2']] = impression_cleaned_df_feature[['movie_rating_count_0','movie_rating_count_1','movie_rating_count_2']].fillna(0)

In [29]:
impression_cleaned_df_feature['movie_rating_percent_0'] = impression_cleaned_df_feature.apply(lambda row: row['movie_rating_count_0']/row['movie_rating_count'], axis=1)
impression_cleaned_df_feature['movie_rating_percent_1'] = impression_cleaned_df_feature.apply(lambda row: row['movie_rating_count_1']/row['movie_rating_count'], axis=1)
impression_cleaned_df_feature['movie_rating_percent_2'] = impression_cleaned_df_feature.apply(lambda row: row['movie_rating_count_2']/row['movie_rating_count'], axis=1)

In [30]:
movie_final_rating_mean = final_rating_cleaned_df.groupby(['movie-code']).mean()
movie_final_rating_mean = movie_final_rating_mean.reset_index()
movie_final_rating_mean.rename(columns={'rating-final':'movie_final_mean'},inplace=True) 

In [31]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, movie_final_rating_mean[['movie-code', 'movie_final_mean']], how='left', on=['movie-code'])

In [32]:
movie_final_rating_std = final_rating_cleaned_df.groupby(['movie-code']).std()
movie_final_rating_std = movie_final_rating_std.reset_index()
movie_final_rating_std.rename(columns={'rating-final':'movie_final_std'},inplace=True) 

In [33]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, movie_final_rating_std[['movie-code', 'movie_final_std']], how='left', on=['movie-code'])

In [34]:
movie_final_rating_count = final_rating_cleaned_df.groupby(['movie-code']).count()
movie_final_rating_count = movie_final_rating_count.reset_index()
movie_final_rating_count.rename(columns={'rating-final':'movie_final_count'},inplace=True) 

In [35]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, movie_final_rating_count[['movie-code', 'movie_final_count']], how='left', on=['movie-code'])

In [36]:
reviewer_final_rating_mean = final_rating_cleaned_df.groupby(['reviewerid']).mean()
reviewer_final_rating_mean = reviewer_final_rating_mean.reset_index()
reviewer_final_rating_mean.rename(columns={'rating-final':'reviewer_final_mean'},inplace=True) 

In [37]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, reviewer_final_rating_mean[['reviewerid', 'reviewer_final_mean']], how='left', on=['reviewerid'])

In [38]:
reviewer_final_rating_std = final_rating_cleaned_df.groupby(['reviewerid']).std()
reviewer_final_rating_std = reviewer_final_rating_std.reset_index()
reviewer_final_rating_std.rename(columns={'rating-final':'reviewer_final_std'},inplace=True) 

In [39]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, reviewer_final_rating_std[['reviewerid', 'reviewer_final_std']], how='left', on=['reviewerid'])

In [40]:
reviewer_final_rating_count = final_rating_cleaned_df.groupby(['reviewerid']).count()
reviewer_final_rating_count = reviewer_final_rating_count.reset_index()
reviewer_final_rating_count.rename(columns={'rating-final':'reviewer_final_count'},inplace=True) 

In [41]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, reviewer_final_rating_count[['reviewerid', 'reviewer_final_count']], how='left', on=['reviewerid'])

In [42]:
reviewer_final_each_rating_count = final_rating_cleaned_df.groupby(['reviewerid','rating-final']).count()
reviewer_final_each_rating_count = reviewer_final_each_rating_count.reset_index()
reviewer_final_each_rating_count.rename(columns={'movie-code':'reviewer_each_rating_count'},inplace=True)

In [43]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         reviewer_final_each_rating_count[reviewer_final_each_rating_count['rating-final']==0][['reviewerid', 'reviewer_each_rating_count']], 
                                         how='left', on=['reviewerid'])
impression_cleaned_df_feature.rename(columns={'reviewer_each_rating_count':'reviewer_final_rating_count_0'},inplace=True)

impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         reviewer_final_each_rating_count[reviewer_final_each_rating_count['rating-final']==1][['reviewerid', 'reviewer_each_rating_count']], 
                                         how='left', on=['reviewerid'])
impression_cleaned_df_feature.rename(columns={'reviewer_each_rating_count':'reviewer_final_rating_count_1'},inplace=True)

impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         reviewer_final_each_rating_count[reviewer_final_each_rating_count['rating-final']==2][['reviewerid', 'reviewer_each_rating_count']], 
                                         how='left', on=['reviewerid'])
impression_cleaned_df_feature.rename(columns={'reviewer_each_rating_count':'reviewer_final_rating_count_2'},inplace=True)

In [44]:
impression_cleaned_df_feature[['reviewer_final_rating_count_0','reviewer_final_rating_count_1','reviewer_final_rating_count_2']] = impression_cleaned_df_feature[['reviewer_final_rating_count_0','reviewer_final_rating_count_1','reviewer_final_rating_count_2']].fillna(0)

In [45]:
impression_cleaned_df_feature['reviewer_final_rating_percent_0'] = impression_cleaned_df_feature.apply(lambda row: row['reviewer_final_rating_count_0']/row['reviewer_final_count'], axis=1)
impression_cleaned_df_feature['reviewer_final_rating_percent_1'] = impression_cleaned_df_feature.apply(lambda row: row['reviewer_final_rating_count_1']/row['reviewer_final_count'], axis=1)
impression_cleaned_df_feature['reviewer_final_rating_percent_2'] = impression_cleaned_df_feature.apply(lambda row: row['reviewer_final_rating_count_2']/row['reviewer_final_count'], axis=1)

In [46]:
movie_final_each_rating_count = final_rating_cleaned_df.groupby(['movie-code','rating-final']).count()
movie_final_each_rating_count = movie_final_each_rating_count.reset_index()
movie_final_each_rating_count.rename(columns={'reviewerid':'movie_each_rating_count'},inplace=True)

In [47]:
impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         movie_final_each_rating_count[movie_final_each_rating_count['rating-final']==0][['movie-code', 'movie_each_rating_count']], 
                                         how='left', on=['movie-code'])
impression_cleaned_df_feature.rename(columns={'movie_each_rating_count':'movie_final_rating_count_0'},inplace=True)

impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         movie_final_each_rating_count[movie_final_each_rating_count['rating-final']==1][['movie-code', 'movie_each_rating_count']], 
                                         how='left', on=['movie-code'])
impression_cleaned_df_feature.rename(columns={'movie_each_rating_count':'movie_final_rating_count_1'},inplace=True)

impression_cleaned_df_feature = pd.merge(impression_cleaned_df_feature, 
                                         movie_final_each_rating_count[movie_final_each_rating_count['rating-final']==2][['movie-code', 'movie_each_rating_count']], 
                                         how='left', on=['movie-code'])
impression_cleaned_df_feature.rename(columns={'movie_each_rating_count':'movie_final_rating_count_2'},inplace=True)

In [48]:
impression_cleaned_df_feature[['movie_final_rating_count_0','movie_final_rating_count_1','movie_final_rating_count_2']] = impression_cleaned_df_feature[['movie_final_rating_count_0','movie_final_rating_count_1','movie_final_rating_count_2']].fillna(0)

In [49]:
impression_cleaned_df_feature['movie_final_rating_percent_0'] = impression_cleaned_df_feature.apply(lambda row: row['movie_final_rating_count_0']/row['movie_final_count'], axis=1)
impression_cleaned_df_feature['movie_final_rating_percent_1'] = impression_cleaned_df_feature.apply(lambda row: row['movie_final_rating_count_1']/row['movie_final_count'], axis=1)
impression_cleaned_df_feature['movie_final_rating_percent_2'] = impression_cleaned_df_feature.apply(lambda row: row['movie_final_rating_count_2']/row['movie_final_count'], axis=1)

In [50]:
impression_cleaned_df_feature['movie_mean_difference'] = impression_cleaned_df_feature.apply(lambda row: row['movie_final_mean'] - row['movie_impression_mean'], axis=1)

In [51]:
impression_cleaned_df_feature['reviewerid'].map(rec_on_impression)

0           [(21, 2), (131, 2), (77, 2), (88, 2), (90, 2)]
1        [(147, 2), (129, 2), (154, 2), (104, 2), (114,...
2          [(131, 2), (77, 2), (88, 2), (90, 2), (122, 2)]
3         [(179, 2), (165, 2), (21, 2), (131, 2), (77, 2)]
4         [(179, 2), (165, 2), (21, 2), (131, 2), (77, 2)]
                               ...                        
21700      [(124, 2), (60, 2), (44, 2), (35, 2), (168, 2)]
21701     [(179, 2), (165, 2), (131, 2), (77, 2), (88, 2)]
21702      [(52, 2), (133, 2), (91, 2), (55, 2), (149, 2)]
21703     [(179, 2), (165, 2), (131, 2), (88, 2), (90, 2)]
21704    [(165, 2), (21, 2), (131, 2), (122, 2), (102, 2)]
Name: reviewerid, Length: 21705, dtype: object

In [52]:
rec_1 = impression_cleaned_df_feature['reviewerid'].map(rec_on_impression)
rec_2 = impression_cleaned_df_feature['reviewerid'].map(rec_on_final_rating)

rec_1_0 = [r[0][0] for r in rec_1]
rec_1_1 = [r[1][0] for r in rec_1]
rec_1_2 = [r[2][0] for r in rec_1]
rec_1_3 = [r[3][0] for r in rec_1]
rec_1_4 = [r[4][0] for r in rec_1]

rec_1_0_s = [r[0][1] for r in rec_1]
rec_1_1_s = [r[1][1] for r in rec_1]
rec_1_2_s = [r[2][1] for r in rec_1]
rec_1_3_s = [r[3][1] for r in rec_1]
rec_1_4_s = [r[4][1] for r in rec_1]

rec_2_0 = [r[0] for r in rec_2]
rec_2_1 = [r[1] for r in rec_2]
rec_2_2 = [r[2] for r in rec_2]
rec_2_3 = [r[3] for r in rec_2]
rec_2_4 = [r[4] for r in rec_2]


In [53]:
impression_cleaned_df_feature['impression_rec_0'] = rec_1_0
impression_cleaned_df_feature['impression_rec_0_score'] = rec_1_0_s

impression_cleaned_df_feature['impression_rec_1'] = rec_1_1
impression_cleaned_df_feature['impression_rec_1_score'] = rec_1_1_s

impression_cleaned_df_feature['impression_rec_2'] = rec_1_2
impression_cleaned_df_feature['impression_rec_2_score'] = rec_1_2_s

impression_cleaned_df_feature['impression_rec_3'] = rec_1_3
impression_cleaned_df_feature['impression_rec_3_score'] = rec_1_3_s

impression_cleaned_df_feature['impression_rec_4'] = rec_1_4
impression_cleaned_df_feature['impression_rec_4_score'] = rec_1_4_s

impression_cleaned_df_feature['final_rating_rec_0'] = rec_2_0
impression_cleaned_df_feature['final_rating_rec_1'] = rec_2_1
impression_cleaned_df_feature['final_rating_rec_2'] = rec_2_2
impression_cleaned_df_feature['final_rating_rec_3'] = rec_2_3
impression_cleaned_df_feature['final_rating_rec_4'] = rec_2_4

In [54]:
impression_cleaned_df_feature

Unnamed: 0,reviewerid,movie-code,rating,reviewer_impression_mean,movie_impression_mean,reviewer_impression_max,reviewer_impression_min,reviewer_rating_count,movie_rating_count,reviewer_impression_std,...,impression_rec_2_score,impression_rec_3,impression_rec_3_score,impression_rec_4,impression_rec_4_score,final_rating_rec_0,final_rating_rec_1,final_rating_rec_2,final_rating_rec_3,final_rating_rec_4
0,116,150,0,0.070175,0.298780,1,0,57,164,0.257713,...,2,88,2,90,2,191,53,64,187,57
1,328,56,2,0.447368,0.468750,2,0,38,96,0.601681,...,2,104,2,114,2,167,181,144,152,194
2,181,181,0,0.440000,0.324468,2,0,50,188,0.611455,...,2,90,2,122,2,79,182,39,32,108
3,131,6,0,0.666667,0.546218,2,0,21,238,0.658281,...,2,131,2,77,2,32,13,68,20,27
4,236,24,2,0.547619,1.055556,2,0,42,162,0.771517,...,2,131,2,77,2,133,32,30,13,74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21700,405,14,1,0.500000,0.441176,2,0,18,136,0.618347,...,2,35,2,168,2,99,73,39,32,108
21701,59,51,0,0.593750,0.559441,2,0,32,143,0.712079,...,2,77,2,88,2,51,177,183,1,102
21702,421,58,2,1.194444,0.503650,2,0,36,137,0.668450,...,2,55,2,149,2,132,155,73,32,108
21703,263,57,0,0.361702,0.397959,2,0,47,98,0.528556,...,2,88,2,90,2,72,112,97,31,64


In [55]:
columns = impression_cleaned_df_feature.columns

In [56]:
columns

Index(['reviewerid', 'movie-code', 'rating', 'reviewer_impression_mean',
       'movie_impression_mean', 'reviewer_impression_max',
       'reviewer_impression_min', 'reviewer_rating_count',
       'movie_rating_count', 'reviewer_impression_std', 'movie_impression_std',
       'reviewer_rating_count_0', 'reviewer_rating_count_1',
       'reviewer_rating_count_2', 'reviewer_rating_percent_0',
       'reviewer_rating_percent_1', 'reviewer_rating_percent_2',
       'movie_rating_count_0', 'movie_rating_count_1', 'movie_rating_count_2',
       'movie_rating_percent_0', 'movie_rating_percent_1',
       'movie_rating_percent_2', 'movie_final_mean', 'movie_final_std',
       'movie_final_count', 'reviewer_final_mean', 'reviewer_final_std',
       'reviewer_final_count', 'reviewer_final_rating_count_0',
       'reviewer_final_rating_count_1', 'reviewer_final_rating_count_2',
       'reviewer_final_rating_percent_0', 'reviewer_final_rating_percent_1',
       'reviewer_final_rating_percent_2', '

In [57]:
reviewer_based_feature = ['reviewerid',
 'reviewer_impression_mean',
 'reviewer_impression_max',
 'reviewer_impression_min',
 'reviewer_rating_count',
 'reviewer_impression_std',
 'reviewer_rating_count_0',
 'reviewer_rating_count_1',
 'reviewer_rating_count_2',
 'reviewer_rating_percent_0',
 'reviewer_rating_percent_1',
 'reviewer_rating_percent_2',
 'reviewer_final_mean',
 'reviewer_final_std',
 'reviewer_final_count',
 'reviewer_final_rating_count_0',
 'reviewer_final_rating_count_1',
 'reviewer_final_rating_count_2',
 'reviewer_final_rating_percent_0',
 'reviewer_final_rating_percent_1',
 'reviewer_final_rating_percent_2',
 'impression_rec_0',
 'impression_rec_0_score',
 'impression_rec_1',
 'impression_rec_1_score',
 'impression_rec_2',
 'impression_rec_2_score',
 'impression_rec_3',
 'impression_rec_3_score',
 'impression_rec_4',
 'impression_rec_4_score',
 'final_rating_rec_0',
 'final_rating_rec_1',
 'final_rating_rec_2',
 'final_rating_rec_3',
 'final_rating_rec_4']

movie_based_feature = ['movie-code',
 'movie_impression_mean',
 'movie_rating_count',
 'movie_impression_std',
 'movie_rating_count_0',
 'movie_rating_count_1',
 'movie_rating_count_2',
 'movie_rating_percent_0',
 'movie_rating_percent_1',
 'movie_rating_percent_2',
 'movie_final_mean',
 'movie_final_std',
 'movie_final_count',
 'movie_final_rating_count_0',
 'movie_final_rating_count_1',
 'movie_final_rating_count_2',
 'movie_final_rating_percent_0',
 'movie_final_rating_percent_1',
 'movie_final_rating_percent_2',
 'movie_mean_difference']

feature_columns = ['reviewer_impression_mean',
 'movie_impression_mean',
 'reviewer_impression_max',
 'reviewer_impression_min',
 'reviewer_rating_count',
 'movie_rating_count',
 'reviewer_impression_std',
 'movie_impression_std',
 'reviewer_rating_count_0',
 'reviewer_rating_count_1',
 'reviewer_rating_count_2',
 'reviewer_rating_percent_0',
 'reviewer_rating_percent_1',
 'reviewer_rating_percent_2',
 'movie_rating_count_0',
 'movie_rating_count_1',
 'movie_rating_count_2',
 'movie_rating_percent_0',
 'movie_rating_percent_1',
 'movie_rating_percent_2',
 'movie_final_mean',
 'movie_final_std',
 'movie_final_count',
 'reviewer_final_mean',
 'reviewer_final_std',
 'reviewer_final_count',
 'reviewer_final_rating_count_0',
 'reviewer_final_rating_count_1',
 'reviewer_final_rating_count_2',
 'reviewer_final_rating_percent_0',
 'reviewer_final_rating_percent_1',
 'reviewer_final_rating_percent_2',
 'movie_final_rating_count_0',
 'movie_final_rating_count_1',
 'movie_final_rating_count_2',
 'movie_final_rating_percent_0',
 'movie_final_rating_percent_1',
 'movie_final_rating_percent_2',
 'movie_mean_difference',
 'impression_rec_0',
 'impression_rec_0_score',
 'impression_rec_1',
 'impression_rec_1_score',
 'impression_rec_2',
 'impression_rec_2_score',
 'impression_rec_3',
 'impression_rec_3_score',
 'impression_rec_4',
 'impression_rec_4_score',
 'final_rating_rec_0',
 'final_rating_rec_1',
 'final_rating_rec_2',
 'final_rating_rec_3',
 'final_rating_rec_4']

std_columns = ['reviewer_impression_max',
 'reviewer_impression_min',
 'reviewer_rating_count',
 'movie_rating_count',
 'movie_final_count',
 'reviewer_final_count']

In [58]:
print(len(feature_columns))
print(len(movie_based_feature)+len(reviewer_based_feature))

54
56


# training data prepare

In [59]:
impression_cleaned_df_test_feature = pd.merge(impression_cleaned_df_test, impression_cleaned_df_feature[reviewer_based_feature].drop_duplicates(subset=['reviewerid']), how='left', on=['reviewerid'])
impression_cleaned_df_test_feature = pd.merge(impression_cleaned_df_test_feature, impression_cleaned_df_feature[movie_based_feature].drop_duplicates(subset=['movie-code']), how='left', on=['movie-code'])

In [60]:
impression_cleaned_df_test_feature

Unnamed: 0,reviewerid,movie-code,rating,reviewer_impression_mean,reviewer_impression_max,reviewer_impression_min,reviewer_rating_count,reviewer_impression_std,reviewer_rating_count_0,reviewer_rating_count_1,...,movie_final_mean,movie_final_std,movie_final_count,movie_final_rating_count_0,movie_final_rating_count_1,movie_final_rating_count_2,movie_final_rating_percent_0,movie_final_rating_percent_1,movie_final_rating_percent_2,movie_mean_difference
0,68,111,0,0.142857,2,0,35,0.493657,32.0,1.0,...,1.381818,0.693812,165,20,62,83,0.121212,0.375758,0.503030,0.923255
1,524,28,0,0.553191,2,0,47,0.746252,28.0,12.0,...,1.519663,0.611768,356,22,127,207,0.061798,0.356742,0.581461,1.075218
2,54,37,1,0.545455,2,0,22,0.800433,14.0,4.0,...,1.264368,0.738631,87,15,34,38,0.172414,0.390805,0.436782,0.701261
3,60,59,0,0.191176,2,0,68,0.432188,56.0,11.0,...,1.667464,0.585143,418,25,89,304,0.059809,0.212919,0.727273,1.152313
4,409,119,1,0.678571,2,0,56,0.690379,25.0,24.0,...,1.438596,0.665657,114,11,42,61,0.096491,0.368421,0.535088,0.806118
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5421,182,174,0,0.468750,2,0,32,0.717719,21.0,7.0,...,1.409231,0.658684,325,31,130,164,0.095385,0.400000,0.504615,0.834763
5422,237,143,0,0.269231,1,0,26,0.452344,19.0,7.0,...,1.394366,0.714170,142,19,48,75,0.133803,0.338028,0.528169,0.727700
5423,368,131,0,0.244898,2,0,49,0.630166,42.0,2.0,...,1.116279,0.697250,43,8,22,13,0.186047,0.511628,0.302326,0.646130
5424,89,8,1,0.727273,2,0,44,0.788394,21.0,14.0,...,1.269231,0.789824,156,33,48,75,0.211538,0.307692,0.480769,0.706206


In [61]:
train_set = np.zeros((21705, 2+len(feature_columns)))
valid_set = np.zeros((5426, 2+len(feature_columns)))

In [62]:
for index, row in impression_cleaned_df_feature.iterrows():
    train_set[index, 0] = row['reviewerid']
    train_set[index, 1] = row['movie-code']
    # train_set[index, 2:2+23] = movie_genre_map[row['movie-code']]
    # train_set[index, 573+201:573+201+768] = movie_bert_map[row['movie-code']]
    for col_idx, col_name in enumerate(feature_columns):
        train_set[index, 2+col_idx] = row[col_name]
for index, row in impression_cleaned_df_test_feature.iterrows():
    valid_set[index, 0] = row['reviewerid']
    valid_set[index, 1] = row['movie-code']
    # valid_set[index, 2:2+23] = movie_genre_map[row['movie-code']]
    # valid_set[index, 573+201:573+201+768] = movie_bert_map[row['movie-code']]
    
    for col_idx, col_name in enumerate(feature_columns):
        valid_set[index, 2+col_idx] = row[col_name]

In [63]:
train_y_label = impression_cleaned_df_feature['rating'].values

In [64]:
valid_y_label = impression_cleaned_df_test_feature['rating'].values

# XGBOOST

In [65]:
import xgboost as xgb
xg_train = xgb.DMatrix(train_set, label=train_y_label)
xg_valid = xgb.DMatrix(valid_set, label=valid_y_label)

In [66]:
# for final training
# xg_test_final = xgb.DMatrix(valid_set)

In [67]:
def point_cal(preds, dtrain):
    points = 0
    
    test_label = dtrain.get_label().tolist()
    pred = preds.tolist()
    
    for i in range(len(test_label)):
        if test_label[i] == pred[i]:
            points += 2
        else:
            if (test_label[i] == 1.0 and pred[i] == 2.0) or (test_label[i] == 2.0 and pred[i] == 1.0):
                points += 1
    return 'POINTS',points

In [70]:
params = {'booster':'gbtree',
         'objective':'multi:softmax',
         'gamma':0.1,
         'min_child_weight':1,
         'max_depth':6,
         'lambda':1,
         'subsample':0.5,
         'colsample_bytree':0.5,
         'colsample_bylevel':0.5,
         'eta':0.01,
         'tree_method':'exact',
         'num_class': 3,
         'gpu_id': 0,
         'tree_method': 'gpu_hist'}

watchlist = [ (xg_train,'train'), (xg_valid, 'test') ]
# watchlist = [ (xg_train,'train')]
num_round = 1400
bst = xgb.train(params, xg_train, num_round, watchlist, feval = point_cal)

[0]	train-merror:0.36945	test-merror:0.40435	train-POINTS:28864.00000	test-POINTS:6942.00000
[1]	train-merror:0.36554	test-merror:0.39845	train-POINTS:29016.00000	test-POINTS:6982.00000
[2]	train-merror:0.36222	test-merror:0.39919	train-POINTS:29194.00000	test-POINTS:6978.00000
[3]	train-merror:0.36065	test-merror:0.39569	train-POINTS:29234.00000	test-POINTS:7002.00000
[4]	train-merror:0.36070	test-merror:0.39643	train-POINTS:29195.00000	test-POINTS:6979.00000
[5]	train-merror:0.36029	test-merror:0.39514	train-POINTS:29202.00000	test-POINTS:6987.00000
[6]	train-merror:0.35950	test-merror:0.39569	train-POINTS:29230.00000	test-POINTS:6990.00000
[7]	train-merror:0.35853	test-merror:0.39348	train-POINTS:29302.00000	test-POINTS:7006.00000
[8]	train-merror:0.35840	test-merror:0.39274	train-POINTS:29308.00000	test-POINTS:7012.00000
[9]	train-merror:0.35775	test-merror:0.39200	train-POINTS:29327.00000	test-POINTS:7021.00000
[10]	train-merror:0.35853	test-merror:0.39237	train-POINTS:29286.00000

[88]	train-merror:0.35264	test-merror:0.39366	train-POINTS:29536.00000	test-POINTS:7025.00000
[89]	train-merror:0.35254	test-merror:0.39403	train-POINTS:29540.00000	test-POINTS:7022.00000
[90]	train-merror:0.35241	test-merror:0.39477	train-POINTS:29543.00000	test-POINTS:7016.00000
[91]	train-merror:0.35241	test-merror:0.39366	train-POINTS:29545.00000	test-POINTS:7025.00000
[92]	train-merror:0.35222	test-merror:0.39384	train-POINTS:29550.00000	test-POINTS:7022.00000
[93]	train-merror:0.35231	test-merror:0.39384	train-POINTS:29551.00000	test-POINTS:7024.00000
[94]	train-merror:0.35213	test-merror:0.39421	train-POINTS:29556.00000	test-POINTS:7020.00000
[95]	train-merror:0.35222	test-merror:0.39403	train-POINTS:29551.00000	test-POINTS:7024.00000
[96]	train-merror:0.35222	test-merror:0.39403	train-POINTS:29557.00000	test-POINTS:7021.00000
[97]	train-merror:0.35176	test-merror:0.39366	train-POINTS:29573.00000	test-POINTS:7028.00000
[98]	train-merror:0.35181	test-merror:0.39403	train-POINTS:2

[175]	train-merror:0.34831	test-merror:0.39569	train-POINTS:29698.00000	test-POINTS:7020.00000
[176]	train-merror:0.34821	test-merror:0.39587	train-POINTS:29703.00000	test-POINTS:7020.00000
[177]	train-merror:0.34803	test-merror:0.39606	train-POINTS:29708.00000	test-POINTS:7019.00000
[178]	train-merror:0.34826	test-merror:0.39587	train-POINTS:29701.00000	test-POINTS:7019.00000
[179]	train-merror:0.34812	test-merror:0.39532	train-POINTS:29705.00000	test-POINTS:7024.00000
[180]	train-merror:0.34826	test-merror:0.39532	train-POINTS:29698.00000	test-POINTS:7023.00000
[181]	train-merror:0.34835	test-merror:0.39550	train-POINTS:29694.00000	test-POINTS:7022.00000
[182]	train-merror:0.34798	test-merror:0.39532	train-POINTS:29709.00000	test-POINTS:7024.00000
[183]	train-merror:0.34821	test-merror:0.39550	train-POINTS:29699.00000	test-POINTS:7019.00000
[184]	train-merror:0.34812	test-merror:0.39587	train-POINTS:29703.00000	test-POINTS:7016.00000
[185]	train-merror:0.34812	test-merror:0.39624	tra

[262]	train-merror:0.34324	test-merror:0.39458	train-POINTS:29912.00000	test-POINTS:7032.00000
[263]	train-merror:0.34301	test-merror:0.39458	train-POINTS:29925.00000	test-POINTS:7032.00000
[264]	train-merror:0.34324	test-merror:0.39458	train-POINTS:29913.00000	test-POINTS:7032.00000
[265]	train-merror:0.34324	test-merror:0.39440	train-POINTS:29912.00000	test-POINTS:7034.00000
[266]	train-merror:0.34315	test-merror:0.39477	train-POINTS:29916.00000	test-POINTS:7030.00000
[267]	train-merror:0.34301	test-merror:0.39495	train-POINTS:29920.00000	test-POINTS:7028.00000
[268]	train-merror:0.34319	test-merror:0.39477	train-POINTS:29914.00000	test-POINTS:7029.00000
[269]	train-merror:0.34329	test-merror:0.39458	train-POINTS:29912.00000	test-POINTS:7031.00000
[270]	train-merror:0.34315	test-merror:0.39495	train-POINTS:29918.00000	test-POINTS:7027.00000
[271]	train-merror:0.34282	test-merror:0.39495	train-POINTS:29928.00000	test-POINTS:7027.00000
[272]	train-merror:0.34269	test-merror:0.39495	tra

[349]	train-merror:0.33923	test-merror:0.39495	train-POINTS:30080.00000	test-POINTS:7032.00000
[350]	train-merror:0.33914	test-merror:0.39514	train-POINTS:30081.00000	test-POINTS:7031.00000
[351]	train-merror:0.33891	test-merror:0.39477	train-POINTS:30088.00000	test-POINTS:7035.00000
[352]	train-merror:0.33900	test-merror:0.39495	train-POINTS:30084.00000	test-POINTS:7033.00000
[353]	train-merror:0.33882	test-merror:0.39458	train-POINTS:30092.00000	test-POINTS:7035.00000
[354]	train-merror:0.33882	test-merror:0.39440	train-POINTS:30091.00000	test-POINTS:7036.00000
[355]	train-merror:0.33872	test-merror:0.39440	train-POINTS:30096.00000	test-POINTS:7036.00000
[356]	train-merror:0.33882	test-merror:0.39440	train-POINTS:30089.00000	test-POINTS:7036.00000
[357]	train-merror:0.33882	test-merror:0.39421	train-POINTS:30087.00000	test-POINTS:7037.00000
[358]	train-merror:0.33859	test-merror:0.39421	train-POINTS:30096.00000	test-POINTS:7036.00000
[359]	train-merror:0.33849	test-merror:0.39458	tra

[436]	train-merror:0.33527	test-merror:0.39403	train-POINTS:30239.00000	test-POINTS:7042.00000
[437]	train-merror:0.33513	test-merror:0.39440	train-POINTS:30242.00000	test-POINTS:7039.00000
[438]	train-merror:0.33522	test-merror:0.39458	train-POINTS:30239.00000	test-POINTS:7038.00000
[439]	train-merror:0.33536	test-merror:0.39458	train-POINTS:30234.00000	test-POINTS:7038.00000
[440]	train-merror:0.33513	test-merror:0.39421	train-POINTS:30244.00000	test-POINTS:7040.00000
[441]	train-merror:0.33513	test-merror:0.39421	train-POINTS:30244.00000	test-POINTS:7040.00000
[442]	train-merror:0.33527	test-merror:0.39403	train-POINTS:30240.00000	test-POINTS:7042.00000
[443]	train-merror:0.33508	test-merror:0.39384	train-POINTS:30247.00000	test-POINTS:7043.00000
[444]	train-merror:0.33485	test-merror:0.39384	train-POINTS:30255.00000	test-POINTS:7044.00000
[445]	train-merror:0.33485	test-merror:0.39366	train-POINTS:30255.00000	test-POINTS:7045.00000
[446]	train-merror:0.33485	test-merror:0.39403	tra

[523]	train-merror:0.33098	test-merror:0.39384	train-POINTS:30404.00000	test-POINTS:7045.00000
[524]	train-merror:0.33075	test-merror:0.39366	train-POINTS:30415.00000	test-POINTS:7046.00000
[525]	train-merror:0.33061	test-merror:0.39421	train-POINTS:30424.00000	test-POINTS:7040.00000
[526]	train-merror:0.33075	test-merror:0.39421	train-POINTS:30419.00000	test-POINTS:7040.00000
[527]	train-merror:0.33057	test-merror:0.39403	train-POINTS:30423.00000	test-POINTS:7041.00000
[528]	train-merror:0.33038	test-merror:0.39421	train-POINTS:30430.00000	test-POINTS:7041.00000
[529]	train-merror:0.33052	test-merror:0.39403	train-POINTS:30426.00000	test-POINTS:7043.00000
[530]	train-merror:0.33025	test-merror:0.39421	train-POINTS:30435.00000	test-POINTS:7040.00000
[531]	train-merror:0.33011	test-merror:0.39421	train-POINTS:30442.00000	test-POINTS:7039.00000
[532]	train-merror:0.33015	test-merror:0.39421	train-POINTS:30443.00000	test-POINTS:7039.00000
[533]	train-merror:0.33002	test-merror:0.39440	tra

[610]	train-merror:0.32522	test-merror:0.39163	train-POINTS:30646.00000	test-POINTS:7070.00000
[611]	train-merror:0.32522	test-merror:0.39200	train-POINTS:30644.00000	test-POINTS:7067.00000
[612]	train-merror:0.32509	test-merror:0.39219	train-POINTS:30651.00000	test-POINTS:7066.00000
[613]	train-merror:0.32541	test-merror:0.39182	train-POINTS:30637.00000	test-POINTS:7069.00000
[614]	train-merror:0.32509	test-merror:0.39200	train-POINTS:30647.00000	test-POINTS:7067.00000
[615]	train-merror:0.32504	test-merror:0.39219	train-POINTS:30651.00000	test-POINTS:7065.00000
[616]	train-merror:0.32490	test-merror:0.39163	train-POINTS:30656.00000	test-POINTS:7070.00000
[617]	train-merror:0.32518	test-merror:0.39182	train-POINTS:30647.00000	test-POINTS:7068.00000
[618]	train-merror:0.32522	test-merror:0.39145	train-POINTS:30646.00000	test-POINTS:7073.00000
[619]	train-merror:0.32490	test-merror:0.39182	train-POINTS:30658.00000	test-POINTS:7069.00000
[620]	train-merror:0.32486	test-merror:0.39163	tra

[697]	train-merror:0.32053	test-merror:0.39126	train-POINTS:30825.00000	test-POINTS:7069.00000
[698]	train-merror:0.32020	test-merror:0.39182	train-POINTS:30840.00000	test-POINTS:7063.00000
[699]	train-merror:0.32025	test-merror:0.39163	train-POINTS:30840.00000	test-POINTS:7065.00000
[700]	train-merror:0.32025	test-merror:0.39145	train-POINTS:30836.00000	test-POINTS:7066.00000
[701]	train-merror:0.32020	test-merror:0.39145	train-POINTS:30838.00000	test-POINTS:7067.00000
[702]	train-merror:0.32011	test-merror:0.39163	train-POINTS:30842.00000	test-POINTS:7065.00000
[703]	train-merror:0.32016	test-merror:0.39200	train-POINTS:30836.00000	test-POINTS:7061.00000
[704]	train-merror:0.32034	test-merror:0.39219	train-POINTS:30829.00000	test-POINTS:7059.00000
[705]	train-merror:0.32025	test-merror:0.39200	train-POINTS:30832.00000	test-POINTS:7061.00000
[706]	train-merror:0.32020	test-merror:0.39219	train-POINTS:30837.00000	test-POINTS:7059.00000
[707]	train-merror:0.32016	test-merror:0.39219	tra

[784]	train-merror:0.31578	test-merror:0.39237	train-POINTS:31002.00000	test-POINTS:7061.00000
[785]	train-merror:0.31564	test-merror:0.39255	train-POINTS:31006.00000	test-POINTS:7060.00000
[786]	train-merror:0.31550	test-merror:0.39255	train-POINTS:31010.00000	test-POINTS:7059.00000
[787]	train-merror:0.31541	test-merror:0.39255	train-POINTS:31013.00000	test-POINTS:7059.00000
[788]	train-merror:0.31541	test-merror:0.39219	train-POINTS:31016.00000	test-POINTS:7062.00000
[789]	train-merror:0.31523	test-merror:0.39219	train-POINTS:31023.00000	test-POINTS:7062.00000
[790]	train-merror:0.31537	test-merror:0.39237	train-POINTS:31016.00000	test-POINTS:7060.00000
[791]	train-merror:0.31550	test-merror:0.39219	train-POINTS:31012.00000	test-POINTS:7063.00000
[792]	train-merror:0.31569	test-merror:0.39219	train-POINTS:31004.00000	test-POINTS:7063.00000
[793]	train-merror:0.31573	test-merror:0.39219	train-POINTS:31002.00000	test-POINTS:7062.00000
[794]	train-merror:0.31560	test-merror:0.39219	tra

[871]	train-merror:0.31034	test-merror:0.39108	train-POINTS:31219.00000	test-POINTS:7075.00000
[872]	train-merror:0.31039	test-merror:0.39126	train-POINTS:31214.00000	test-POINTS:7074.00000
[873]	train-merror:0.31025	test-merror:0.39108	train-POINTS:31221.00000	test-POINTS:7076.00000
[874]	train-merror:0.31039	test-merror:0.39108	train-POINTS:31215.00000	test-POINTS:7077.00000
[875]	train-merror:0.31016	test-merror:0.39053	train-POINTS:31223.00000	test-POINTS:7083.00000
[876]	train-merror:0.30998	test-merror:0.39053	train-POINTS:31231.00000	test-POINTS:7081.00000
[877]	train-merror:0.30988	test-merror:0.38997	train-POINTS:31234.00000	test-POINTS:7088.00000
[878]	train-merror:0.30988	test-merror:0.39053	train-POINTS:31235.00000	test-POINTS:7082.00000
[879]	train-merror:0.31030	test-merror:0.39034	train-POINTS:31223.00000	test-POINTS:7084.00000
[880]	train-merror:0.31002	test-merror:0.39034	train-POINTS:31228.00000	test-POINTS:7084.00000
[881]	train-merror:0.30998	test-merror:0.39034	tra

[958]	train-merror:0.30564	test-merror:0.39053	train-POINTS:31390.00000	test-POINTS:7080.00000
[959]	train-merror:0.30564	test-merror:0.39053	train-POINTS:31390.00000	test-POINTS:7080.00000
[960]	train-merror:0.30578	test-merror:0.39053	train-POINTS:31382.00000	test-POINTS:7080.00000
[961]	train-merror:0.30569	test-merror:0.39034	train-POINTS:31385.00000	test-POINTS:7082.00000
[962]	train-merror:0.30560	test-merror:0.39016	train-POINTS:31391.00000	test-POINTS:7083.00000
[963]	train-merror:0.30564	test-merror:0.38979	train-POINTS:31387.00000	test-POINTS:7087.00000
[964]	train-merror:0.30555	test-merror:0.38961	train-POINTS:31389.00000	test-POINTS:7088.00000
[965]	train-merror:0.30555	test-merror:0.38997	train-POINTS:31391.00000	test-POINTS:7085.00000
[966]	train-merror:0.30523	test-merror:0.38979	train-POINTS:31405.00000	test-POINTS:7087.00000
[967]	train-merror:0.30505	test-merror:0.38997	train-POINTS:31410.00000	test-POINTS:7087.00000
[968]	train-merror:0.30486	test-merror:0.38942	tra

[1044]	train-merror:0.29970	test-merror:0.38997	train-POINTS:31624.00000	test-POINTS:7088.00000
[1045]	train-merror:0.29975	test-merror:0.38961	train-POINTS:31623.00000	test-POINTS:7092.00000
[1046]	train-merror:0.29984	test-merror:0.38961	train-POINTS:31619.00000	test-POINTS:7092.00000
[1047]	train-merror:0.29947	test-merror:0.38924	train-POINTS:31631.00000	test-POINTS:7095.00000
[1048]	train-merror:0.29952	test-merror:0.38924	train-POINTS:31628.00000	test-POINTS:7095.00000
[1049]	train-merror:0.29915	test-merror:0.38905	train-POINTS:31641.00000	test-POINTS:7096.00000
[1050]	train-merror:0.29906	test-merror:0.38905	train-POINTS:31647.00000	test-POINTS:7096.00000
[1051]	train-merror:0.29896	test-merror:0.38887	train-POINTS:31652.00000	test-POINTS:7098.00000
[1052]	train-merror:0.29892	test-merror:0.38961	train-POINTS:31653.00000	test-POINTS:7090.00000
[1053]	train-merror:0.29887	test-merror:0.38961	train-POINTS:31653.00000	test-POINTS:7090.00000
[1054]	train-merror:0.29896	test-merror:

[1130]	train-merror:0.29330	test-merror:0.39016	train-POINTS:31861.00000	test-POINTS:7083.00000
[1131]	train-merror:0.29311	test-merror:0.39016	train-POINTS:31866.00000	test-POINTS:7084.00000
[1132]	train-merror:0.29307	test-merror:0.38979	train-POINTS:31869.00000	test-POINTS:7086.00000
[1133]	train-merror:0.29293	test-merror:0.38997	train-POINTS:31875.00000	test-POINTS:7085.00000
[1134]	train-merror:0.29288	test-merror:0.38961	train-POINTS:31874.00000	test-POINTS:7089.00000
[1135]	train-merror:0.29265	test-merror:0.38997	train-POINTS:31881.00000	test-POINTS:7086.00000
[1136]	train-merror:0.29270	test-merror:0.38942	train-POINTS:31881.00000	test-POINTS:7092.00000
[1137]	train-merror:0.29265	test-merror:0.38942	train-POINTS:31884.00000	test-POINTS:7092.00000
[1138]	train-merror:0.29251	test-merror:0.38979	train-POINTS:31888.00000	test-POINTS:7088.00000
[1139]	train-merror:0.29242	test-merror:0.38961	train-POINTS:31891.00000	test-POINTS:7091.00000
[1140]	train-merror:0.29256	test-merror:

[1216]	train-merror:0.28648	test-merror:0.38813	train-POINTS:32119.00000	test-POINTS:7105.00000
[1217]	train-merror:0.28639	test-merror:0.38868	train-POINTS:32127.00000	test-POINTS:7100.00000
[1218]	train-merror:0.28615	test-merror:0.38868	train-POINTS:32131.00000	test-POINTS:7102.00000
[1219]	train-merror:0.28620	test-merror:0.38868	train-POINTS:32129.00000	test-POINTS:7101.00000
[1220]	train-merror:0.28606	test-merror:0.38868	train-POINTS:32135.00000	test-POINTS:7101.00000
[1221]	train-merror:0.28592	test-merror:0.38887	train-POINTS:32139.00000	test-POINTS:7100.00000
[1222]	train-merror:0.28634	test-merror:0.38924	train-POINTS:32123.00000	test-POINTS:7096.00000
[1223]	train-merror:0.28606	test-merror:0.38850	train-POINTS:32134.00000	test-POINTS:7103.00000
[1224]	train-merror:0.28583	test-merror:0.38850	train-POINTS:32143.00000	test-POINTS:7104.00000
[1225]	train-merror:0.28569	test-merror:0.38868	train-POINTS:32149.00000	test-POINTS:7100.00000
[1226]	train-merror:0.28583	test-merror:

[1302]	train-merror:0.28086	test-merror:0.38850	train-POINTS:32327.00000	test-POINTS:7101.00000
[1303]	train-merror:0.28081	test-merror:0.38850	train-POINTS:32329.00000	test-POINTS:7100.00000
[1304]	train-merror:0.28076	test-merror:0.38832	train-POINTS:32331.00000	test-POINTS:7101.00000
[1305]	train-merror:0.28049	test-merror:0.38813	train-POINTS:32343.00000	test-POINTS:7102.00000
[1306]	train-merror:0.28044	test-merror:0.38795	train-POINTS:32346.00000	test-POINTS:7103.00000
[1307]	train-merror:0.28044	test-merror:0.38813	train-POINTS:32345.00000	test-POINTS:7103.00000
[1308]	train-merror:0.28017	test-merror:0.38832	train-POINTS:32355.00000	test-POINTS:7101.00000
[1309]	train-merror:0.28017	test-merror:0.38868	train-POINTS:32357.00000	test-POINTS:7098.00000
[1310]	train-merror:0.27994	test-merror:0.38868	train-POINTS:32365.00000	test-POINTS:7098.00000
[1311]	train-merror:0.27984	test-merror:0.38868	train-POINTS:32370.00000	test-POINTS:7098.00000
[1312]	train-merror:0.27998	test-merror:

[1388]	train-merror:0.27519	test-merror:0.38832	train-POINTS:32542.00000	test-POINTS:7102.00000
[1389]	train-merror:0.27547	test-merror:0.38832	train-POINTS:32530.00000	test-POINTS:7103.00000
[1390]	train-merror:0.27542	test-merror:0.38850	train-POINTS:32529.00000	test-POINTS:7103.00000
[1391]	train-merror:0.27528	test-merror:0.38813	train-POINTS:32535.00000	test-POINTS:7107.00000
[1392]	train-merror:0.27533	test-merror:0.38868	train-POINTS:32534.00000	test-POINTS:7102.00000
[1393]	train-merror:0.27514	test-merror:0.38905	train-POINTS:32542.00000	test-POINTS:7098.00000
[1394]	train-merror:0.27533	test-merror:0.38887	train-POINTS:32532.00000	test-POINTS:7099.00000
[1395]	train-merror:0.27510	test-merror:0.38887	train-POINTS:32545.00000	test-POINTS:7099.00000
[1396]	train-merror:0.27496	test-merror:0.38868	train-POINTS:32548.00000	test-POINTS:7101.00000
[1397]	train-merror:0.27473	test-merror:0.38905	train-POINTS:32561.00000	test-POINTS:7098.00000
[1398]	train-merror:0.27450	test-merror:

In [69]:
pred = bst.predict( xg_test )

NameError: name 'xg_test' is not defined