# LFM

In [None]:
import os
import pickle
from pathlib import Path
import pandas as pd
from lightfm import LightFM # https://github.com/lyst/lightfm
from scipy import sparse


data_dir = '/home/workspace/h-and-m-personalized-fashion-recommendations'
os.makedirs(f"{data_dir}/lfm", exist_ok=True)

LIGHTFM_PARAMS = {
    'learning_schedule': 'adadelta', 
    'loss': 'bpr', 
    'learning_rate': 0.005, 
    'random_state': 42, 
}

EPOCHS = 100 

def user_item(week: int, dim: int):

    path_prefix = f"{data_dir}/lfm/lfm_i_i_week{week}_dim{dim}" 
    print(path_prefix) 
    transactions = pd.read_pickle(f"{data_dir}/transactions_train.pkl") 
    users = pd.read_pickle(f"{data_dir}/users.pkl") 
    items = pd.read_pickle(f"{data_dir}/items.pkl") 
    n_user = len(users) 
    n_item = len(items)
    a = transactions.query("@week <= week")[['user', 'item']].drop_duplicates(ignore_index=True) 
    a_train = sparse.lil_matrix((n_user, n_item)) 
    a_train[a['user'], a['item']] = 1 

    lightfm_params = LIGHTFM_PARAMS.copy() 
    lightfm_params['no_components'] = dim 
    model = LightFM(**lightfm_params) 
    model.fit(a_train, epochs=EPOCHS, num_threads=4, verbose=True) 

    save_path = f"{path_prefix}_model.pkl"
    with open(save_path, 'wb') as f:
        pickle.dump(model, f)

        

for week in range(1, 14):
    user_item(week, 16)

# user_features

In [None]:
import os
from pathlib import Path
import pandas as pd
import vaex 


data_dir = '/home/workspace/h-and-m-personalized-fashion-recommendations'
os.makedirs(f"{data_dir}/user_features", exist_ok=True)

def create_user_ohe_agg(week):

    transactions = pd.read_pickle(f'{data_dir}/transactions_train.pkl')[['user', 'item', 'week']] 
    users = pd.read_pickle(f'{data_dir}/users.pkl') 
    items = pd.read_pickle(f'{data_dir}/items.pkl') 

    tr = vaex.from_pandas(transactions.query("week >= @week")[['user', 'item']]) 

    target_columns = [c for c in items.columns if c.endswith('_idx')] 
    for c in target_columns:
        tmp = tr.join(vaex.from_pandas(pd.get_dummies(items[['item', c]], columns=[c])), on='item') 
        tmp = tmp.drop(columns='item') 

        tmp = tmp.groupby('user').agg(['mean']) 

        users = vaex.from_pandas(users[['user']]).join(tmp, on='user', how='left').to_pandas_df() 
        users = users.rename(columns={
            c: f'user_ohe_agg_{c}' for c in users.columns if c != 'user'
        })

        users = users.sort_values(by='user').reset_index(drop=True) 

        save_path = f'{data_dir}/user_features/user_ohe_agg_week{week}_{c}.pkl' 
        users.to_pickle(save_path) 
        print("saved", save_path)


for week in range(14):
    create_user_ohe_agg(week)