In [1]:

import warnings
warnings.filterwarnings('ignore')
# load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.font_manager as fm
import os
plt.rcParams['font.sans-serif'] = ['Heiti TC'] # to show the chinese characters
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score, recall_score, f1_score

from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k, auc_score

# display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
# removing warnings


In [11]:
# load the train data # save to csv
train_data = pd.read_csv("train_data.csv")
train_data.head()
# load train user data
train_users = pd.read_csv("train_users.csv")
#load train item data
train_items = pd.read_csv("train_items.csv")
# load interaction data
train_interaction = pd.read_csv("train_interactions_filter.csv")


# load train_items_features
train_item_features = pd.read_csv("train_items_feature.csv")
# load user profile
user_profile = pd.read_csv("user_profile.csv")


In [12]:
train_item_features.columns


Index(['item_ID', 'item_name', 'brand', 'channel', 'size', 'gender',
       'class_map', 'price_range', 'style_50', 'color_50', 'unit_price',
       'discount', 'sale_price'],
      dtype='object')

In [13]:
# need drop columns that are not important for computation issues.
train_item_features = train_item_features.drop(columns=['item_name'], axis=1)

In [6]:
user_profile.dtypes, train_items.dtypes

(user_id                          int64
 num_items_bought                 int64
 num_unique_items_bought          int64
 num_unique_sizes_bought          int64
 num_unique_class_map_bought      int64
 most_like_size                  object
 most_like_class_map             object
 most_like_price_range          float64
 most_like_style_50              object
 most_like_color_50              object
 avg_unit_price                 float64
 avg_discount                   float64
 avg_sale_price                 float64
 dtype: object,
 item_ID                 int64
 item_name              object
 brand                  object
 channel                 int64
 unit_price            float64
 category               object
 size                   object
 color                  object
 discount              float64
 gender                 object
 class_map              object
 bk_sku_id             float64
 category.1             object
 style                  object
 brand_location_map     object

In [14]:
train_interaction.head()

Unnamed: 0,user_id,item_ID,order_time
0,209227,1595587,2024-03-14
1,243637,1639001,2024-03-14
2,265503,1675222,2024-03-14
3,183614,1662640,2024-03-14
4,188753,1662540,2024-03-14


In [15]:
# check item_ID 140793 is in the train_items
train_items[train_items['item_ID'] == 1409730]

Unnamed: 0,item_ID,item_name,brand,channel,unit_price,category,size,color,discount,gender,class_map,bk_sku_id,category.1,style,brand_location_map,materials_map,sex,country_size,country_in_stock,tag,price_range,country,style_50,color_50
8150,1409730,MMI赠品,MR&MRS ITALY,30,0.0,手拿包,,,,未知,箱包,,,,,,,,,豪华&现代&都市,,,奢华,


In [16]:
# drop the item_ID 1409730
train_interaction = train_interaction[train_interaction['item_ID'] != 1409730]

In [17]:
# check if user_id of 88405, 146365, 221475] are in the user_profile
user_profile[user_profile['user_id'].isin([88405, 146365, 221475])]

Unnamed: 0,user_id,num_items_bought,num_unique_items_bought,num_unique_sizes_bought,num_unique_class_map_bought,most_like_size,most_like_class_map,most_like_price_range,most_like_style_50,most_like_color_50,avg_unit_price,avg_discount,avg_sale_price


In [18]:
# check user_id 138716 in the user_profile and train_users
train_users[train_users['user_id'].isin([138716])]
# user_profile[user_profile['user_id'].isin([138716])]

Unnamed: 0,user_id
131486,138716


In [19]:
# check if user_id of 88405, 146365, 221475] are in the train_users
train_users[train_users['user_id'].isin([88405, 146365, 221475])]

Unnamed: 0,user_id
83803,88405
138716,146365
209906,221475


In [20]:
# drop item_ID 1409730 from train_interaction
train_interaction = train_interaction[train_interaction['item_ID'] != 1409730]

In [21]:
# drop the user_id of 88405, 146365, 221475] from the train_users
train_users = train_users[~train_users['user_id'].isin([88405, 146365, 221475])]

In [22]:
# check if item_id of 88405, 146365, 221475] are in the train_interaction
train_interaction[train_interaction['user_id'].isin([88405, 146365, 221475])].shape

(28667, 3)

In [23]:
# drop the rows with user_id of 88405, 146365, 221475] in the train_interaction
train_interaction = train_interaction[~train_interaction['user_id'].isin([88405, 146365, 221475])]

In [24]:
pos_samples = train_interaction.drop(["order_time", ], axis=1)

In [25]:
pos_samples["label"] = 1

In [26]:
import random

# Assuming pos_samples is already defined
pos_num = len(pos_samples)
random.seed(2024)  # set random seed

neg_samples = {"user_id": [], "item_ID": [], "label": []}
for i in range(pos_num):
    uid = random.choice(train_users["user_id"].values)
    neg_samples["user_id"].append(uid)

    iid = random.choice(train_items["item_ID"].values)
    neg_samples["item_ID"].append(iid)

    # Add label for negative samples
    neg_samples["label"].append(0)

neg_samples = pd.DataFrame(neg_samples)

# Assuming pos_samples also has a 'label' column
pos_samples["label"] = 1

# Combine pos_samples and neg_samples to a new train interaction data
train_interaction = pd.concat([pos_samples, neg_samples])
train_interaction.head(), train_interaction.shape

(   user_id  item_ID  label
 0   209227  1595587      1
 1   243637  1639001      1
 2   265503  1675222      1
 3   183614  1662640      1
 4   188753  1662540      1,
 (1169676, 3))

In [27]:
# check item_ID 1409730 is in the train_interaction
train_interaction[train_interaction['item_ID'] == 1409730]

Unnamed: 0,user_id,item_ID,label
118345,240385,1409730,0
123949,24509,1409730,0
398540,37002,1409730,0
459157,194965,1409730,0
485974,189207,1409730,0
517590,49039,1409730,0
540557,245443,1409730,0


In [28]:
# drop the item_ID 1409730 from the train_interaction
train_interaction = train_interaction[train_interaction['item_ID'] != 1409730]


In [29]:


# Drop the user_id column before encoding
user_ids = user_profile['user_id']
user_profile = user_profile.drop(columns=['user_id'])

# Identify numerical and categorical features
numerical_features = user_profile.select_dtypes(include=['int64', 'float64']).columns
categorical_features = user_profile.select_dtypes(include=['object']).columns

# Pipeline for numerical features
numerical_pipeline = Pipeline([
    ('scaler', StandardScaler())
])

# Pipeline for categorical features
categorical_pipeline = Pipeline([
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Full pipeline
preprocessor = ColumnTransformer([
    ('num', numerical_pipeline, numerical_features),
    ('cat', categorical_pipeline, categorical_features)
])

# Apply the transformations to the user profile data
user_profile_processed = preprocessor.fit_transform(user_profile)

# Convert the processed data to a dense format
user_profile_processed_dense = user_profile_processed.toarray()

# Get feature names for the encoded categorical features
try:
    cat_feature_names = preprocessor.named_transformers_['cat']['encoder'].get_feature_names_out(categorical_features)
except AttributeError:
    cat_feature_names = preprocessor.named_transformers_['cat']['encoder'].get_feature_names(categorical_features)

# Convert the processed data back to a DataFrame
user_profile_encoded = pd.DataFrame(user_profile_processed_dense, columns=numerical_features.tolist() + cat_feature_names.tolist())

# Add the user_id column back to the DataFrame
user_profile_encoded['user_id'] = user_ids.values

# Display the first few rows of the encoded DataFrame
user_profile_encoded.head()

Unnamed: 0,num_items_bought,num_unique_items_bought,num_unique_sizes_bought,num_unique_class_map_bought,most_like_price_range,avg_unit_price,avg_discount,avg_sale_price,most_like_size_0,most_like_size_00,most_like_size_000,most_like_size_001,most_like_size_002,most_like_size_00F,most_like_size_02,most_like_size_035,most_like_size_036,most_like_size_037,most_like_size_038,most_like_size_039,most_like_size_04,most_like_size_042,most_like_size_044,most_like_size_046,most_like_size_048,most_like_size_050,most_like_size_052,most_like_size_06,most_like_size_08,most_like_size_08A,most_like_size_1,most_like_size_10,most_like_size_10 EEE,most_like_size_10*1*7cm,most_like_size_10*1*8cm,most_like_size_10*1*9cm,most_like_size_10*2*9.5cm,most_like_size_10.5,most_like_size_10.5*1*8.5cm,most_like_size_10A,most_like_size_10A/10岁/140cm,most_like_size_10cm*1cm,most_like_size_11,most_like_size_11*1.5*9.5cm,most_like_size_11*2*10cm,most_like_size_11*2*9,most_like_size_11*2*9.5cm,most_like_size_11*2*9cm,most_like_size_11*3*10cm,most_like_size_11.5*2*9.5cm,most_like_size_11.5*9.5*1.5cm,most_like_size_110*95*20mm,most_like_size_11cm*1cm,most_like_size_11cm*2cm,most_like_size_12,most_like_size_12*2.5*9cm,most_like_size_12A,most_like_size_12A/12岁/152cm,most_like_size_13*2.5*8.5cm,most_like_size_13.5*2.5*9.5cm,most_like_size_14,most_like_size_14A,most_like_size_14A/14岁/164cm,most_like_size_15*2*10cm,most_like_size_16,most_like_size_165/88A(S),most_like_size_16A,most_like_size_170/92A,most_like_size_170/92A(M),most_like_size_175/82A,most_like_size_180/100A,most_like_size_180/86A,most_like_size_185/104A,most_like_size_185/90A,most_like_size_19,most_like_size_190/108A,most_like_size_190/94A,most_like_size_19cm*2cm,most_like_size_1XL,most_like_size_1（160/100）,most_like_size_1（160/66）,most_like_size_1（160/68）,most_like_size_1（160/84）,most_like_size_1（165/100）,most_like_size_1（165/86）,most_like_size_1（S）,most_like_size_2,most_like_size_2\n155/64A,most_like_size_2\n155/80A,most_like_size_2(35),most_like_size_2.5,most_like_size_2.5(35.5),most_like_size_20,most_like_size_22,most_like_size_23,most_like_size_24,most_like_size_24W/32L,most_like_size_24W/34,most_like_size_25,most_like_size_25W,most_like_size_25W/32,most_like_size_25W/32L,most_like_size_25W/34,most_like_size_26,most_like_size_26W,most_like_size_26W/32,most_like_size_26W/32L,most_like_size_26W/34,most_like_size_27,most_like_size_27W,most_like_size_27W/32,most_like_size_27W/32L,most_like_size_27W/34,most_like_size_28,most_like_size_28W/32,most_like_size_28W/32L,most_like_size_29,most_like_size_29/32,most_like_size_2XL,most_like_size_2XS,most_like_size_2XS/XS,most_like_size_2（165/120）,most_like_size_2（165/70）,most_like_size_2（165/72）,most_like_size_2（165/88）,most_like_size_2（170/120）,most_like_size_2（170/90）,most_like_size_2（35）,most_like_size_2（M）,most_like_size_3,most_like_size_3(36),most_like_size_3+,most_like_size_3.5,most_like_size_3.5(36.5),most_like_size_30,most_like_size_30/32,most_like_size_30（170/76A）,most_like_size_31,most_like_size_31/32,most_like_size_32,most_like_size_33,most_like_size_34,most_like_size_34.5,most_like_size_34T,most_like_size_35,most_like_size_35.5,most_like_size_35A,most_like_size_35M,most_like_size_35cm*1cm,most_like_size_35（180/90A）,most_like_size_36,most_like_size_36+,most_like_size_36.5,most_like_size_36.5码,most_like_size_36/46,most_like_size_36A,most_like_size_36FR,most_like_size_36M,most_like_size_37,most_like_size_37 C,most_like_size_37+,most_like_size_37.5,most_like_size_37M,most_like_size_38,most_like_size_38 2/3,most_like_size_38 2/3(38.5),most_like_size_38 C,most_like_size_38+,most_like_size_38.5,most_like_size_38/48,most_like_size_38M,most_like_size_39,most_like_size_39 1/3(39),most_like_size_39 40,most_like_size_39.5,most_like_size_390,most_like_size_39M,most_like_size_39码/175/84A,most_like_size_3M,most_like_size_3XL,most_like_size_3（170/140）,most_like_size_3（170/74）,most_like_size_3（170/92）,most_like_size_3（175/140）,most_like_size_3（175/94）,most_like_size_3（36）,most_like_size_4,most_like_size_4\n160/84A,most_like_size_4(37),most_like_size_4+,most_like_size_4.5,most_like_size_4.5(37.5),most_like_size_40,most_like_size_40 2 3,most_like_size_40.5,most_like_size_40/50,most_like_size_400,most_like_size_40F,most_like_size_40M,most_like_size_40码/180/88A,most_like_size_41,most_like_size_41 1 3,most_like_size_41 1/3,most_like_size_41+,most_like_size_41.5,most_like_size_410,most_like_size_41M,most_like_size_41码/185/92A,most_like_size_42,most_like_size_42 2 3,most_like_size_42 2/3,most_like_size_42 2/3(42.5),most_like_size_42+,most_like_size_42.5,most_like_size_42/52,most_like_size_420,most_like_size_42M,most_like_size_43,most_like_size_43 1/3,most_like_size_43.5,most_like_size_430,most_like_size_43M,most_like_size_44,most_like_size_44/54,most_like_size_45,most_like_size_46,most_like_size_46（165/88A）,most_like_size_48,most_like_size_48码,most_like_size_48（170/92A）,most_like_size_4A/4岁/104cm,most_like_size_4XL,most_like_size_5,most_like_size_5 1/2,most_like_size_5 EEE,most_like_size_5+,most_like_size_5.5,most_like_size_5.5 C,most_like_size_5.5（39）,most_like_size_50,most_like_size_500（40）,most_like_size_50（175/96A）,most_like_size_52,most_like_size_52（180/100A）,most_like_size_54,most_like_size_54（185/104A）,most_like_size_56,most_like_size_6,most_like_size_6 1/2,most_like_size_6 C,most_like_size_6 EE,most_like_size_6 EEE,most_like_size_6(39),most_like_size_6(40),most_like_size_6.5,most_like_size_6.5 C,most_like_size_6.5 EEE,most_like_size_6.5(40.5),most_like_size_60,most_like_size_650（43）,most_like_size_6A/6岁/116cm,most_like_size_6M,most_like_size_6m,most_like_size_7,most_like_size_7 1/2,most_like_size_7 EE,most_like_size_7 EEE,most_like_size_7 M,most_like_size_7(41),most_like_size_7.5,most_like_size_7.5 D,most_like_size_7.5 EEE,most_like_size_7.5M,most_like_size_7A,most_like_size_7M,most_like_size_7（41）,most_like_size_8,most_like_size_8 1/2,most_like_size_8 EEE,most_like_size_8 M,most_like_size_8(42),most_like_size_8+,most_like_size_8.5,most_like_size_8.5(42.5),most_like_size_80,most_like_size_8M,most_like_size_8（42）,most_like_size_9,most_like_size_9 EEE,most_like_size_9 M,most_like_size_9*2*10.5cm,most_like_size_9+,most_like_size_9.5,most_like_size_9.5*1*7.5cm,most_like_size_9.5*11*2cm,most_like_size_9.5*11cm,most_like_size_9M,most_like_size_9（43）,most_like_size_A1 (S),most_like_size_A1 （S),most_like_size_A1(S),most_like_size_A1（S）,most_like_size_A2,most_like_size_A2 (M),most_like_size_A2 （M),most_like_size_A2(M),most_like_size_A2（M）,most_like_size_A3,most_like_size_A3 （L),most_like_size_A3(L),most_like_size_A3（L）,most_like_size_F,most_like_size_F/M均码,most_like_size_F/均码,most_like_size_L,most_like_size_L\n165/88A,most_like_size_L\n170/72A,most_like_size_L\n170/90A,most_like_size_L\n175/76A,most_like_size_L\n175/92A,most_like_size_L\n180/96A,most_like_size_L(165/72A),most_like_size_L/44,most_like_size_L/XL,most_like_size_L码,most_like_size_L码 170/84A,most_like_size_L码 175/95,most_like_size_L码 175/96A,most_like_size_L码 180/88A,most_like_size_L码/170/84A,most_like_size_L码/180/88A,most_like_size_L（175/82A）,most_like_size_M,most_like_size_M\n160/84A,most_like_size_M\n165/68A,most_like_size_M\n165/86A,most_like_size_M\n170/72A,most_like_size_M\n170/88A,most_like_size_M\n175/92A,most_like_size_M-L,most_like_size_M/10,most_like_size_M/42,most_like_size_M/L,most_like_size_ML,most_like_size_M码 165/80A,most_like_size_M码 165/80A.1,most_like_size_M码 170/92A,most_like_size_M码 170/95,most_like_size_M码 175/84A,most_like_size_M码/165/62A,most_like_size_M码/165/80A,most_like_size_M码/175/84A,most_like_size_M（160/84A）,most_like_size_S,most_like_size_S\n155/80A,most_like_size_S\n160/64A,most_like_size_S\n160/82A,most_like_size_S\n165/68A,most_like_size_S\n165/84A,most_like_size_S\n170/88A,most_like_size_S-M,most_like_size_S/40,most_like_size_S/8,most_like_size_S/M,most_like_size_SM,most_like_size_S码 160/76A,most_like_size_S码 165/88A,most_like_size_S码/160/76A,most_like_size_S码/170/80A,most_like_size_S（155/80A）,most_like_size_S（165/74A）,most_like_size_T 36,most_like_size_T 38,most_like_size_T 40,most_like_size_T 42,most_like_size_T2,most_like_size_T36,most_like_size_T38,most_like_size_T40,most_like_size_T42,most_like_size_TU,most_like_size_U,most_like_size_W31*L32,most_like_size_XL,most_like_size_XL\n175/94A,most_like_size_XL\n180/80A,most_like_size_XL\n180/96A,most_like_size_XL/2XL,most_like_size_XL码 175/100,most_like_size_XL码 180/100A,most_like_size_XL码 185/92A,most_like_size_XL码/185/92A,most_like_size_XL（180/86A）,most_like_size_XS,most_like_size_XS\n160/80A,most_like_size_XS-S,most_like_size_XS/38,most_like_size_XS/6,most_like_size_XS/S,most_like_size_XS码 165/76A,most_like_size_XS码/155/72A,most_like_size_XXL,most_like_size_XXL码/190/96A,most_like_size_XXS,most_like_size_XXS/4,most_like_size_XXS/XS,most_like_size_XXXL,most_like_size_XXXS,most_like_size_XXl 185/100A,most_like_size_m,most_like_size_s,most_like_size_xs,most_like_size_大：11*1*10\n小：10*1*7cm,most_like_size_宽11cm*高10cm*厚3cm,most_like_size_小,most_like_size_常规,most_like_size_欧洲码40,most_like_size_欧洲码42,most_like_class_map_上装,most_like_class_map_内衣,most_like_class_map_小孩,most_like_class_map_帽子,most_like_class_map_箱包,most_like_class_map_裙子,most_like_class_map_裤子,most_like_class_map_配饰,most_like_class_map_鞋靴,most_like_style_50_个性化,most_like_style_50_个性街头,most_like_style_50_休闲,most_like_style_50_优雅,most_like_style_50_优雅简约,most_like_style_50_儿童风,most_like_style_50_冬季风格,most_like_style_50_别致休闲,most_like_style_50_前卫,most_like_style_50_前卫个性,most_like_style_50_复古,most_like_style_50_奢华,most_like_style_50_实验性,most_like_style_50_户外风,most_like_style_50_极简,most_like_style_50_极简设计,most_like_style_50_波西米亚,most_like_style_50_海洋风,most_like_style_50_现代,most_like_style_50_现代艺术,most_like_style_50_甜美风格,most_like_style_50_田园风,most_like_style_50_简约,most_like_style_50_经典,most_like_style_50_自然主义,most_like_style_50_自然色调,most_like_style_50_艺术风格,most_like_style_50_街头艺术,most_like_style_50_街头风,most_like_style_50_运动休闲,most_like_style_50_运动潮流,most_like_style_50_都市时尚,most_like_style_50_高端时尚,most_like_style_50_高街风,most_like_color_50_others,most_like_color_50_几何系列,most_like_color_50_卡其色系列,most_like_color_50_咖啡系列,most_like_color_50_土黄色系列,most_like_color_50_复古风系列,most_like_color_50_多色系列,most_like_color_50_彩虹系列,most_like_color_50_无色系列,most_like_color_50_条纹系列,most_like_color_50_棕色系列,most_like_color_50_橙色系列,most_like_color_50_海洋系列,most_like_color_50_淡色系列,most_like_color_50_深蓝色系列,most_like_color_50_混合色系列,most_like_color_50_灰色系列,most_like_color_50_灰褐色系列,most_like_color_50_特殊颜色系列,most_like_color_50_白色系列,most_like_color_50_米白色,most_like_color_50_米色拼接系列,most_like_color_50_米色系列,most_like_color_50_粉色系列,most_like_color_50_紫色系列,most_like_color_50_红色系列,most_like_color_50_绿色系列,most_like_color_50_肤色系列,most_like_color_50_花色系列,most_like_color_50_蓝色系列,most_like_color_50_裸色系列,most_like_color_50_迷彩系列,most_like_color_50_迷彩色系列,most_like_color_50_透明系列,most_like_color_50_透明色系列,most_like_color_50_酒红色系列,most_like_color_50_金属色系列,most_like_color_50_金色系列,most_like_color_50_银色系列,most_like_color_50_青色系列,most_like_color_50_香槟色系列,most_like_color_50_高级灰系列,most_like_color_50_鲜艳色系列,most_like_color_50_黄色系列,most_like_color_50_黑色系列,user_id
0,0.273873,0.299104,0.897466,2.590741,-0.511307,0.038776,0.578098,0.149188,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,99.061013,98.465596,34.283465,10.030719,-0.511307,-0.190379,-0.851726,-0.312725,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2
2,3.395587,3.424053,7.698317,4.078737,2.832265,1.542674,-0.240295,0.981879,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3
3,13.767733,14.738522,20.681762,8.542724,-0.511307,-0.257887,-1.185787,-0.400002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4
4,7.725706,8.273111,12.644391,7.054728,-0.511307,0.242354,-0.799994,-0.132029,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,5


In [19]:
train_item_features.head()

Unnamed: 0,item_ID,brand,channel,size,gender,class_map,price_range,style_50,color_50,unit_price,discount,sale_price
0,1001920,FENDI,others,39,女,鞋靴,5.0,奢华,others,299.0,0.059443,17.773559
1,1003191,others,15,U,女,箱包,2.0,优雅,红色系列,89.0,0.032014,2.849281
2,1003194,others,15,U,女,箱包,2.0,优雅,棕色系列,19.0,0.008085,0.153617
3,1003197,others,others,U,女,帽子,2.0,优雅,棕色系列,99.0,0.032673,3.234653
4,1003210,others,15,U,女,帽子,2.0,优雅,红色系列,29.0,0.030526,0.885263


In [30]:
# check item_ID 140793 in the train_items
train_items[train_items['item_ID'].isin([140793])]

Unnamed: 0,item_ID,item_name,brand,channel,unit_price,category,size,color,discount,gender,class_map,bk_sku_id,category.1,style,brand_location_map,materials_map,sex,country_size,country_in_stock,tag,price_range,country,style_50,color_50


In [31]:
# check item_ID 140793 in the interaction data
train_interaction[train_interaction['item_ID'].isin([140793])]

Unnamed: 0,user_id,item_ID,label


In [32]:
# apply the same pipeline to the train_items
# Drop the item_ID column before encoding
item_ids = train_item_features['item_ID']
train_item_features = train_item_features.drop(columns=['item_ID'])

# Identify numerical and categorical features
numerical_features = train_item_features.select_dtypes(include=['int64', 'float64']).columns
categorical_features = train_item_features.select_dtypes(include=['object']).columns

# Pipeline for numerical features
numerical_pipeline = Pipeline([
    ('scaler', StandardScaler())
])

# Pipeline for categorical features
categorical_pipeline = Pipeline([
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Full pipeline
preprocessor = ColumnTransformer([
    ('num', numerical_pipeline, numerical_features),
    ('cat', categorical_pipeline, categorical_features)
])

# Apply the transformations to the item features data
train_item_features_processed = preprocessor.fit_transform(train_item_features)

# Convert the processed data to a dense format
train_item_features_processed_dense = train_item_features_processed.toarray()

# Get feature names for the encoded categorical features
try:
    cat_feature_names = preprocessor.named_transformers_['cat']['encoder'].get_feature_names_out(categorical_features)
except AttributeError:
    cat_feature_names = preprocessor.named_transformers_['cat']['encoder'].get_feature_names(categorical_features)

# Convert the processed data back to a DataFrame
train_item_features_encoded = pd.DataFrame(train_item_features_processed_dense, columns=numerical_features.tolist() + cat_feature_names.tolist())

# Add the item_ID column back to the DataFrame
train_item_features_encoded['item_ID'] = item_ids.values

# Display the first few rows of the encoded DataFrame
train_item_features_encoded.head()






Unnamed: 0,price_range,unit_price,discount,sale_price,brand_13DE MARZO,brand_ACLER,brand_ALEXANDER MCQUEEN,brand_ALEXIA SANDRA,brand_ALL COMES FROM NOTHING,brand_AMI ALEXANDRE MATTIUSSI,brand_ANN ANDELMAN,brand_AQUASCUTUM,brand_ARMANI,brand_ARMANI EXCHANGE,brand_BALLY,brand_BURBERRY,brand_BY FAR,brand_C/MEO,brand_CALVIN KLEIN,brand_CAMPER,brand_CANADA GOOSE,brand_CELINE,brand_CHAMPION,brand_COACH,brand_CREAZIONI,brand_DIOR,brand_EMPORIO ARMANI,brand_ETRE CECILE,brand_FENDI,brand_FINDERS KEEPERS,brand_FLONAKED,brand_GANNI,brand_GOLDEN GOOSE,brand_GUCCI,brand_HEREU,brand_HERLIAN,brand_HOLZWEILER,brand_HUGO BOSS,brand_IMMI,brand_INSPIRE&STORIES,brand_JIMMY CHOO,brand_KEEPSAKE,brand_KENZO,brand_LOEWE,brand_LONGCHAMP,brand_LOVE MOSCHINO,brand_MAISON KITSUNE,brand_MARNI,brand_MAX MARA,brand_MCM,brand_MCQ,brand_MICHAEL KORS,brand_MM6 MAISON MARGIELA,brand_MOSCHINO,brand_MR&MRS ITALY,brand_MSGM,brand_NEW ERA,brand_OFF WHITE,brand_PINKO,brand_PRADA,brand_PS PAUL SMITH,brand_RECTO,brand_RED VALENTINO,brand_REPRESENT,brand_ROCHA ROMA,brand_SALVATORE FERRAGAMO,brand_SIGNIFICANT OTHER,brand_SOFITTE,brand_STELLA MCCARTNEY,brand_STUART WEITZMAN,brand_SUNCOO,brand_THE FIFTH,brand_THE LAST REDEMPTION,brand_THOM BROWNE,brand_THREE QUARTERS,brand_TORY BURCH,brand_VEJA,brand_VERSACE,brand_VERSACE JEANS COUTURE,brand_VETEMENTS,brand_VIKTORIA CHAN,brand_WE11 DONE,brand_WGMX,brand_XANDRA,brand_ZEGNA,brand_others,brand_简白,channel_15,channel_2,channel_54,channel_others,size_0,size_0-3M,size_00,size_000,size_001,size_002,size_00F,size_02,size_034,size_035,size_036,size_037,size_038,size_039,size_04,size_040,size_041,size_042,size_043,size_044,size_046,size_048,size_050,size_052,size_06,size_06A,size_08,size_08A,size_0M,size_0（XS),size_1,size_1-3M,size_10,size_10 EEE,size_10*1*7cm,size_10*1*8cm,size_10*1*9cm,size_10*2*9.5cm,size_10*37cm,size_10+,size_10.5,size_10.5*1*8.5cm,size_104,size_104cm,size_107cm,size_10A,size_10A/10岁/140cm,size_10M,size_10Y,size_10cm*1cm,size_11,size_11*1.5*9.5cm,size_11*2*10cm,size_11*2*9,size_11*2*9.5cm,size_11*2*9cm,size_11*3*10cm,size_11.5*2*9.5cm,size_11.5*9.5*1.5cm,size_110,size_110*95*20mm,size_116,size_116cm,size_11A,size_11cm*1cm,size_11cm*2cm,size_12,size_12*2.5*9cm,size_12+,size_12-18m,size_12-24M,size_120,size_122,size_122cm,size_128cm,size_12A,size_12A/12岁/152cm,size_12M,size_12Y,size_12m,size_13*2.5*8.5cm,size_13.5*2.5*9.5cm,size_130,size_133cm,size_14,size_140cm,size_146cm,size_14A,size_14A/14岁/164cm,size_14A/164cm,size_15*2*10cm,size_15A,size_15M,size_16,size_162cm,size_165/88A(S),size_16A,size_16M,size_17,size_170/78A,size_170/92A,size_170/92A(M),size_175/82A,size_175/96A,size_18,size_18-24M,size_18-24m,size_180/100A,size_180/86A,size_185/104A,size_185/90A,size_18M,size_18m,size_19,size_190/108A,size_190/94A,size_19cm*2cm,size_1XL,size_1码,size_1（160/100）,size_1（160/66）,size_1（160/68）,size_1（160/84）,size_1（165/100）,size_1（165/86）,size_1（S）,size_2,size_2\n155/64A,size_2\n155/80A,size_2(35),size_2.5,size_2.5(35.5),size_20,size_21,size_22,size_23,size_24,size_24M,size_24W/32L,size_24W/34,size_24m,size_25,size_25W,size_25W/32,size_25W/32L,size_25W/34,size_26,size_26W,size_26W/32,size_26W/32L,size_26W/34,size_27,size_27W,size_27W/32,size_27W/32L,size_27W/34,size_28,size_28W/32,size_28W/32L,size_29,size_29/32,size_2A,size_2T,size_2XL,size_2XS,size_2XS/XS,size_2Y,size_2（165/120）,size_2（165/70）,size_2（165/72）,size_2（165/88）,size_2（170/120）,size_2（170/90）,size_2（35）,size_2（M）,size_3,size_3(36),size_3+,size_3-6M,size_3-6m,size_3.5,size_3.5(36.5),size_3/6M,size_30,size_30/32,size_30（170/76A）,size_31,size_31/32,size_31（170/78A）,size_32,size_32T,size_33,size_33/32,size_34,size_34+C,size_34.5,size_34/32,size_34T,size_34（175/86A）,size_35,size_35+,size_35.5,size_350,size_35A,size_35M,size_35cm*1cm,size_35（180/90A）,size_36,size_36 C,size_36+,size_36.5,size_36.5码,size_36/46,size_36A,size_36FR,size_36M,size_36T,size_37,size_37 1/3(37),size_37 C,size_37(欧码37),size_37+,size_37.5,size_37A,size_37M,size_38,size_38 2/3,size_38 2/3(38.5),size_38 C,size_38+,size_38+C,size_38.5,size_38/48,size_38A,size_38M,size_38码,size_39,size_39 1 3,size_39 1/3(39),size_39 40,size_39 C,size_39.5,size_390,size_39A,size_39M,size_39码/175/84A,size_3A,size_3A/3岁/98cm,size_3M,size_3XL,size_3Y,size_3m,size_3（170/140）,size_3（170/74）,size_3（170/76）,size_3（170/92）,size_3（175/140）,size_3（175/94）,size_3（36）,size_3（L）,size_4,size_4\n160/68A,size_4\n160/84A,size_4(37),size_4+,size_4.5,size_4.5(37.5),size_40,size_40 2 3,size_40 2/3,size_40 2/3(40.5),size_40+,size_40.5,size_40/50,size_400,size_40F,size_40M,size_40码,size_40码 180/88A,size_40码/180/88A,size_41,size_41 1 3,size_41 1/3,size_41 1/3(41),size_41 42,size_41#,size_41+,size_41.5,size_410,size_41M,size_41码/185/92A,size_42,size_42 2 3,size_42 2/3,size_42 2/3(42.5),size_42#,size_42+,size_42.5,size_42/52,size_420,size_42M,size_43,size_43 1 3,size_43 1/3,size_43.5,size_430,size_43M,size_44,size_44/54,size_440,size_44M,size_45,size_450（39）,size_45cm,size_46,size_46（165/88A）,size_48,size_48 175/96A,size_48码,size_48（170/92A）,size_4A,size_4A/4岁/104cm,size_4XL,size_4Y,size_4y,size_4（XL）,size_5,size_5 1/2,size_5 B,size_5 C,size_5 EE,size_5 EEE,size_5(38),size_5(39),size_5+,size_5.5,size_5.5 C,size_5.5 EEE,size_5.5（39）,size_50,size_50 180/100A,size_500（40）,size_50mm*14mm*115mm,size_50码,size_50（175/96A）,size_52,size_52 180/104A,size_52码,size_52（180/100A）,size_54,size_54 185/108A,size_54cm,size_54（185/104A）,size_56,size_56mm*14mm*120mm,size_56（190/108A）,size_5A,size_5M,size_5（38）,size_5（XXL）,size_6,size_6\n165/72A,size_6\n165/88A,size_6 1/2,size_6 C,size_6 EE,size_6 EEE,size_6(39),size_6(40),size_6+,size_6-9M,size_6-9m,size_6.5,size_6.5 C,size_6.5 EEE,size_6.5(40.5),size_60,size_60cm,size_62,size_62cm,size_65,size_650（43）,size_65cm,size_67cm,size_68,size_68cm,size_6A,size_6A/6岁/116cm,size_6M,size_6Y,size_6m,size_6y,size_6½,size_6（39）,size_7,size_7 1/2,size_7 C,size_7 EE,size_7 EEE,size_7 M,size_7(41),size_7+,size_7.5,size_7.5 C,size_7.5 D,size_7.5 EEE,size_7.5(41.5),size_7.5M,size_74,size_74cm,size_76cm,size_7A,size_7M,size_7（41）,size_8,size_8 1/2,size_8 EEE,size_8 M,size_8(42),size_8(M),size_8+,size_8-12A,size_8.5,size_8.5 EEE,size_8.5(42.5),size_8/,size_80,size_80cm,size_86cm,size_8A,size_8A/8岁/128cm,size_8M,size_8Y,size_8（42）,size_9,size_9 EEE,size_9 M,size_9(43),size_9*2*10.5cm,size_9+,size_9-12M,size_9-12m,size_9.5,size_9.5*1*7.5cm,size_9.5*11*2cm,size_9.5*11cm,size_92cm,size_98cm,size_9A,size_9M,size_9m,size_9（43）,size_A1 (S),size_A1 （S),size_A1(S),size_A1（S）,size_A2,size_A2 (M),size_A2 （M),size_A2(M),size_A2（M）,size_A3,size_A3 (L),size_A3 （L),size_A3(L),size_A3（L）,size_AF(均码）,size_AF（均码）,size_F,size_F/M均码,size_F/均码,size_II,size_III,size_L,size_L\n165/88A,size_L\n170/72A,size_L\n170/90A,size_L\n175/76A,size_L\n175/92A,size_L\n180/96A,size_L(165/72A),size_L/44,size_L/XL,size_L码,size_L码 170/84A,size_L码 175/95,size_L码 175/96A,size_L码 180/88A,size_L码/170/84A,size_L码/180/76A,size_L码/180/88A,size_L码180/88A,size_L（175/82A）,size_L（39-40）,size_M,size_M\n160/84A,size_M\n165/68A,size_M\n165/86A,size_M\n170/72A,size_M\n170/88A,size_M\n175/92A,size_M(160/68A),size_M-L,size_M/10,size_M/42,size_M/L,size_ML,size_M码,size_M码 165/80A,size_M码 165/80A.1,size_M码 170/92A,size_M码 170/95,size_M码 175/84A,size_M码/165/62A,size_M码/165/80A,size_M码/175/72A,size_M码/175/84A,size_M码175/84A,size_M码_175/84A,size_M（160/84A）,size_M（170/78A）,size_M（170/95）,size_M（175/92A）,size_M（37-38）,size_M（均码）,size_OS,size_P,size_S,size_S\n155/80A,size_S\n160/64A,size_S\n160/82A,size_S\n165/68A,size_S\n165/84A,size_S\n170/88A,size_S-M,size_S/40,size_S/8,size_S/M,size_SM,size_S码 160/76A,size_S码 165/88A,size_S码 170/80A,size_S码/160/76A,size_S码/170/68A/,size_S码/170/80A,size_S码170/80A,size_S（155/64A）,size_S（155/80A）,size_S（165/74A）,size_S（35-36）,size_T 36,size_T 38,size_T 40,size_T 42,size_T2,size_T36,size_T38,size_T40,size_T42,size_TU,size_U,size_UK 10,size_UK 6,size_UK 8,size_UNI,size_W31*L32,size_XL,size_XL\n170/76A,size_XL\n175/76A,size_XL\n175/94A,size_XL\n180/80A,size_XL\n180/96A,size_XL/2XL,size_XL/XXL,size_XL码,size_XL码 175/100,size_XL码 175/88A,size_XL码 180/100A,size_XL码 185/92A,size_XL码/185/92A,size_XL（180/86A）,size_XS,size_XS\n160/80A,size_XS-S,size_XS/38,size_XS/6,size_XS/S,size_XS码 165/76A,size_XS码/155/72A,size_XXL,size_XXL/190/96A,size_XXL码,size_XXL码/190/96A,size_XXS,size_XXS/4,size_XXS/XS,size_XXXL,size_XXXL（190/94A）,size_XXXS,size_XXl 185/100A,size_m,size_s,size_xL,size_xs,size_xxs,size_Ⅱ,size_Ⅲ,size_Ⅴ,size_大：11*1*10\n小：10*1*7cm,size_宽11cm*高10cm*厚3cm,size_小,size_常规,size_帽围51cm,size_帽围55cm,size_帽围：30cm,size_帽围：32cm,size_帽围：34cm,size_帽围：38cm,size_帽围：40cm,size_帽围：42cm,size_意大利码46,size_欧州码52,size_欧洲码36,size_欧洲码38,size_欧洲码40,size_欧洲码42,size_欧洲码44,size_欧洲码46,size_欧洲码48,size_欧洲码50,size_欧洲码52,size_欧洲码54,size_欧码35,size_欧码36,size_欧码36.5,size_欧码37,size_欧码37.5,size_欧码38,size_欧码38.5,size_欧码39,size_欧码39.5,size_欧码40,size_欧码41,size_欧码42,size_欧码43,size_欧码8.5,size_罗马尺码IV,size_美码10,size_美码2,size_美码4(XS),size_美码5.5,size_美码6,size_美码6.5,size_美码7,size_美码7.5,size_美码8,size_美码9,size_英码6,size_英码8,size_英美尺码48,size_衬衫英寸码15,size_衬衫通用码39,size_衬衫通用码40,size_衬衫通用码41,size_衬衫通用码44,size_衬衫通用码45,size_衬衫通用码46,size_身高110-116cm,size_身高122-128cm,gender_女,gender_未知,gender_男,gender_男女,class_map_上装,class_map_内衣,class_map_小孩,class_map_帽子,class_map_箱包,class_map_裙子,class_map_裤子,class_map_配饰,class_map_鞋靴,style_50_个性化,style_50_个性街头,style_50_休闲,style_50_优雅,style_50_优雅简约,style_50_儿童风,style_50_冬季风格,style_50_别致休闲,style_50_前卫,style_50_前卫个性,style_50_复古,style_50_奢华,style_50_实验性,style_50_户外风,style_50_极简,style_50_极简设计,style_50_波西米亚,style_50_海洋风,style_50_现代,style_50_现代艺术,style_50_甜美风格,style_50_田园风,style_50_简约,style_50_经典,style_50_自然主义,style_50_自然色调,style_50_艺术风格,style_50_街头艺术,style_50_街头风,style_50_运动休闲,style_50_运动潮流,style_50_都市时尚,style_50_高端时尚,style_50_高街风,color_50_others,color_50_几何系列,color_50_卡其色系列,color_50_咖啡系列,color_50_土黄色系列,color_50_复古风系列,color_50_多色系列,color_50_大地色系列,color_50_彩虹系列,color_50_无色系列,color_50_条纹系列,color_50_棕色系列,color_50_橙色系列,color_50_海洋系列,color_50_淡色系列,color_50_深色系列,color_50_深蓝色系列,color_50_混合色系列,color_50_灰色系列,color_50_灰褐色系列,color_50_特殊颜色系列,color_50_现代色系列,color_50_白色系列,color_50_米白色,color_50_米白色系列,color_50_米色拼接系列,color_50_米色系列,color_50_粉色系列,color_50_紫色系列,color_50_红色系列,color_50_绿色系列,color_50_肤色系列,color_50_艺术色系列,color_50_花色系列,color_50_蓝色系列,color_50_裸色系列,color_50_迷彩系列,color_50_迷彩色系列,color_50_透明系列,color_50_透明色系列,color_50_酒红色系列,color_50_金属色系列,color_50_金色系列,color_50_银色系列,color_50_青色系列,color_50_香槟色系列,color_50_高级灰系列,color_50_鲜艳色系列,color_50_黄色系列,color_50_黑白格子系列,color_50_黑色系列,item_ID
0,2.411184,-0.640895,-1.687119,-0.636977,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1001920
1,-0.494681,-0.750136,-1.789802,-0.647089,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1003191
2,-0.494681,-0.78655,-1.879383,-0.648916,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1003194
3,-0.494681,-0.744934,-1.787335,-0.646828,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1003197
4,-0.494681,-0.781348,-1.795373,-0.64842,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1003210


In [33]:
# shoe full list of columns names
train_item_features_encoded.columns.tolist()

['price_range',
 'unit_price',
 'discount',
 'sale_price',
 'brand_13DE MARZO',
 'brand_ACLER',
 'brand_ALEXANDER MCQUEEN',
 'brand_ALEXIA SANDRA',
 'brand_ALL COMES FROM NOTHING',
 'brand_AMI ALEXANDRE MATTIUSSI',
 'brand_ANN ANDELMAN',
 'brand_AQUASCUTUM',
 'brand_ARMANI',
 'brand_ARMANI EXCHANGE',
 'brand_BALLY',
 'brand_BURBERRY',
 'brand_BY FAR',
 'brand_C/MEO',
 'brand_CALVIN KLEIN',
 'brand_CAMPER',
 'brand_CANADA GOOSE',
 'brand_CELINE',
 'brand_CHAMPION',
 'brand_COACH',
 'brand_CREAZIONI',
 'brand_DIOR',
 'brand_EMPORIO ARMANI',
 'brand_ETRE CECILE',
 'brand_FENDI',
 'brand_FINDERS KEEPERS',
 'brand_FLONAKED',
 'brand_GANNI',
 'brand_GOLDEN GOOSE',
 'brand_GUCCI',
 'brand_HEREU',
 'brand_HERLIAN',
 'brand_HOLZWEILER',
 'brand_HUGO BOSS',
 'brand_IMMI',
 'brand_INSPIRE&STORIES',
 'brand_JIMMY CHOO',
 'brand_KEEPSAKE',
 'brand_KENZO',
 'brand_LOEWE',
 'brand_LONGCHAMP',
 'brand_LOVE MOSCHINO',
 'brand_MAISON KITSUNE',
 'brand_MARNI',
 'brand_MAX MARA',
 'brand_MCM',
 'brand_MCQ

In [34]:
user_profile_encoded.columns.tolist()   

['num_items_bought',
 'num_unique_items_bought',
 'num_unique_sizes_bought',
 'num_unique_class_map_bought',
 'most_like_price_range',
 'avg_unit_price',
 'avg_discount',
 'avg_sale_price',
 'most_like_size_0',
 'most_like_size_00',
 'most_like_size_000',
 'most_like_size_001',
 'most_like_size_002',
 'most_like_size_00F',
 'most_like_size_02',
 'most_like_size_035',
 'most_like_size_036',
 'most_like_size_037',
 'most_like_size_038',
 'most_like_size_039',
 'most_like_size_04',
 'most_like_size_042',
 'most_like_size_044',
 'most_like_size_046',
 'most_like_size_048',
 'most_like_size_050',
 'most_like_size_052',
 'most_like_size_06',
 'most_like_size_08',
 'most_like_size_08A',
 'most_like_size_1',
 'most_like_size_10',
 'most_like_size_10 EEE',
 'most_like_size_10*1*7cm',
 'most_like_size_10*1*8cm',
 'most_like_size_10*1*9cm',
 'most_like_size_10*2*9.5cm',
 'most_like_size_10.5',
 'most_like_size_10.5*1*8.5cm',
 'most_like_size_10A',
 'most_like_size_10A/10岁/140cm',
 'most_like_size

In [35]:
# check missing values
user_profile_encoded.isnull().sum().sum(), train_item_features_encoded.isnull().sum().sum(), train_interaction.isnull().sum().sum()


(0, 3, 0)

In [36]:
# drop na values
train_item_features_encoded = train_item_features_encoded.dropna()


In [37]:
# Create a LightFM dataset
dataset = Dataset()

# Fit dataset with unique user IDs and item IDs
dataset.fit(
    users=user_profile_encoded['user_id'].unique(),
    items=train_item_features_encoded['item_ID'].unique(),
    user_features=user_profile_encoded.columns.drop('user_id'),
    item_features=train_item_features_encoded.columns.drop('item_ID')
)


In [38]:
# Build interactions and features for the LightFM model
(interactions, weights) = dataset.build_interactions(
    train_interaction[['user_id', 'item_ID', 'label']].values
)

user_features = dataset.build_user_features(
    user_profile_encoded.values
)

item_features = dataset.build_item_features(
    train_item_features_encoded.values
)




ValueError: Item id 1531887 not in item id mapping. Make sure you call the fit method.

In [39]:
# Build user features matrix
# user_features = dataset.build_user_features(
#     [(row['user_id'], row.drop('user_id').values.tolist())
#      for _, row in user_profile_encoded.iterrows()]
# )

# Build item features matrix
item_features = dataset.build_item_features(
    [(row['item_ID'], row.drop('item_ID').values.tolist())
     for _, row in train_item_features.iterrows()]
)
# Build interaction matrix
(interactions, weights) = dataset.build_interactions(
    [(row['user_id'], row['item_ID'], row['label'])
     for _, row in train_interaction.iterrows()]
)

KeyError: 'item_ID'

In [40]:

# Load user profile
#user_profile = pd.read_csv("user_profile.csv")

# Drop the user_id column before encoding
user_ids = user_profile['user_id']
user_profile = user_profile.drop(columns=['user_id'])

# Identify numerical and categorical features
numerical_features = user_profile.select_dtypes(include=['int64', 'float64']).columns
categorical_features = user_profile.select_dtypes(include=['object']).columns

# Pipeline for numerical features
numerical_pipeline = Pipeline([
    ('scaler', StandardScaler())
])

# Pipeline for categorical features
categorical_pipeline = Pipeline([
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Full pipeline
preprocessor = ColumnTransformer([
    ('num', numerical_pipeline, numerical_features),
    ('cat', categorical_pipeline, categorical_features)
])

# Apply the transformations to the user profile data
user_profile_processed = preprocessor.fit_transform(user_profile)

# Convert the processed data to a dense format
user_profile_processed_dense = user_profile_processed.toarray()

# Get feature names for the encoded categorical features
try:
    cat_feature_names = preprocessor.named_transformers_['cat']['encoder'].get_feature_names_out(categorical_features)
except AttributeError:
    cat_feature_names = preprocessor.named_transformers_['cat']['encoder'].get_feature_names(categorical_features)

# Convert the processed data back to a DataFrame
user_profile_encoded = pd.DataFrame(user_profile_processed_dense, columns=numerical_features.tolist() + cat_feature_names.tolist())

# Add the user_id column back to the DataFrame
user_profile_encoded['user_id'] = user_ids.values

# Load train item features
#train_item_features = pd.read_csv("train_items_feature.csv")

# Drop the item_ID column before encoding
item_ids = train_item_features['item_ID']
train_item_features = train_item_features.drop(columns=['item_ID'])

# Identify numerical and categorical features for items
numerical_features_items = train_item_features.select_dtypes(include=['int64', 'float64']).columns
categorical_features_items = train_item_features.select_dtypes(include=['object']).columns

# Pipeline for numerical features
numerical_pipeline_items = Pipeline([
    ('scaler', StandardScaler())
])

# Pipeline for categorical features
categorical_pipeline_items = Pipeline([
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Full pipeline for items
preprocessor_items = ColumnTransformer([
    ('num', numerical_pipeline_items, numerical_features_items),
    ('cat', categorical_pipeline_items, categorical_features_items)
])

# Apply the transformations to the item features data
train_item_features_processed = preprocessor_items.fit_transform(train_item_features)

# Convert the processed data to a dense format
train_item_features_processed_dense = train_item_features_processed.toarray()

# Get feature names for the encoded categorical features
try:
    cat_feature_names_items = preprocessor_items.named_transformers_['cat']['encoder'].get_feature_names_out(categorical_features_items)
except AttributeError:
    cat_feature_names_items = preprocessor_items.named_transformers_['cat']['encoder'].get_feature_names(categorical_features_items)

# Convert the processed data back to a DataFrame
train_item_features_encoded = pd.DataFrame(train_item_features_processed_dense, columns=numerical_features_items.tolist() + cat_feature_names_items.tolist())

# Add the item_ID column back to the DataFrame
train_item_features_encoded['item_ID'] = item_ids.values

# Create a LightFM dataset
dataset = Dataset()

# Fit dataset with unique user IDs and item IDs
dataset.fit(
    users=user_profile_encoded['user_id'].unique(),
    items=train_item_features_encoded['item_ID'].unique(),
    user_features=user_profile_encoded.columns.drop('user_id'),
    item_features=train_item_features_encoded.columns.drop('item_ID')
)

# Build user features matrix
user_features = dataset.build_user_features(
    [(row['user_id'], row.drop('user_id').values.tolist())
     for _, row in user_profile_encoded.iterrows()]
)

# Build item features matrix
item_features = dataset.build_item_features(
    [(row['item_ID'], row.drop('item_ID').values.tolist())
     for _, row in train_item_features_encoded.iterrows()]
)

# Load interaction data
train_interaction = pd.read_csv("train_interactions.csv")

# Ensure the interaction data has a 'label' column
if 'label' not in train_interaction.columns:
    train_interaction['label'] = 1  # Assuming all interactions are positive

# Build interaction matrix
(interactions, weights) = dataset.build_interactions(
    [(row['user_id'], row['item_ID'], row['label'])
     for _, row in train_interaction.iterrows()]
)

# Initialize the model with WARP loss
model = LightFM(loss='warp')

# Train the model
model.fit(
    interactions=interactions,
    user_features=user_features,
    item_features=item_features,
    epochs=30,
    num_threads=4
)

KeyError: 'user_id'

In [38]:
user_profile_encoded.columns.tolist()

['num_items_bought',
 'num_unique_items_bought',
 'num_unique_sizes_bought',
 'num_unique_class_map_bought',
 'most_like_price_range',
 'avg_unit_price',
 'avg_discount',
 'avg_sale_price',
 'most_like_size_0',
 'most_like_size_00',
 'most_like_size_000',
 'most_like_size_001',
 'most_like_size_002',
 'most_like_size_00F',
 'most_like_size_02',
 'most_like_size_035',
 'most_like_size_036',
 'most_like_size_037',
 'most_like_size_038',
 'most_like_size_039',
 'most_like_size_04',
 'most_like_size_042',
 'most_like_size_044',
 'most_like_size_046',
 'most_like_size_048',
 'most_like_size_050',
 'most_like_size_052',
 'most_like_size_06',
 'most_like_size_08',
 'most_like_size_08A',
 'most_like_size_1',
 'most_like_size_10',
 'most_like_size_10 EEE',
 'most_like_size_10*1*7cm',
 'most_like_size_10*1*8cm',
 'most_like_size_10*1*9cm',
 'most_like_size_10*2*9.5cm',
 'most_like_size_10.5',
 'most_like_size_10.5*1*8.5cm',
 'most_like_size_10A',
 'most_like_size_10A/10岁/140cm',
 'most_like_size

In [None]:
import numpy as np
from lightfm import LightFM
from lightfm.evaluation import precision_at_k

# Initialize the model with WARP loss
model = LightFM(loss='warp')

# Train the model
model.fit(
    interactions=interactions,
    user_features=user_features,
    item_features=item_features,
    epochs=30,
    num_threads=4
)


NameError: name 'interactions' is not defined