In [1]:
import os
import sys
sys.path.append('../../')
import math
import pandas as pd
import numpy as np
import recs.path as path
import json
from recs.cf_mf import MatrixFactorization, generate_cf_predict_df
from recs.util import id2cat, recall_evaluate, readjson2dict, convert_result_json_2_csv
import recs.path as path
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from tqdm import tqdm
import time

In [2]:
# Date Setting
history_period = [201600, 202100]
best_sell_period = [201900 ,202100]
current_yyyymm = 202012

# Date Validations    
if (current_yyyymm > history_period[1]):
    raise SystemExit("Error: target_user_date > history_end")

u2idx = readjson2dict("crm_idx")
i2idx = readjson2dict("fund_info_idx")

exist_funds_path = os.path.join(path.data_path, 'exist_funds_2021')
crm_path = os.path.join(path.data_path, "crm_diff.pkl")
trans_buy_path = os.path.join(path.data_path, "trans_buy.pkl")

exist_funds = readjson2dict(exist_funds_path)
exist_funds = [i2idx.get(i) for i in exist_funds]

trans_buy = pd.read_pickle(trans_buy_path)
crm = pd.read_pickle(crm_path)

target_users = crm[crm.yyyymm == current_yyyymm].id_number.unique().tolist()

# training
train_df = trans_buy[
    (trans_buy.buy_date < history_period[1]*100) &
    (trans_buy.buy_date > history_period[0]*100) 
]

In [3]:

rate_df = train_df.groupby(["id_number", "fund_id"]).size().reset_index(name="Rate")

In [4]:
best_sells = trans_buy[
    (trans_buy.buy_date > best_sell_period[0]*100) & (trans_buy.buy_date < best_sell_period[1]*100)
].fund_id.value_counts().rename_axis('fund_id').reset_index(name='count')


def rec_forward(rate_df, svds_k, target_users = target_users, ignore_item=[], train_bs_funds=200):
    rate_df = rate_df[~rate_df.fund_id.isin(ignore_item)]
    rate_df =rate_df[
        rate_df['fund_id'].isin(best_sells['fund_id'].tolist()[:train_bs_funds])
    ]
    cf_preds_matrix = generate_cf_predict_df(rate_df, svds_k)
    CF = MatrixFactorization(cf_preds_matrix)
    
    rec_dict = {}
    for u in tqdm(target_users):
        ignore_items = train_df[train_df.id_number == u].fund_id.unique().tolist()
        if (u in cf_preds_matrix.columns.tolist()):            
            rec_dict[str(u)] = CF.rec_items(u, items_to_ignore=ignore_items).fund_id.tolist()
        else:
            best_sell_funds = best_sells[~best_sells['fund_id'].isin(ignore_items)]['fund_id'].tolist()[:10]
            rec_dict[u] = best_sell_funds
    return rec_dict

In [5]:
params = {
    "train_bs_funds": 2000,
    "svd_k": 53
}
rec_dict = rec_forward(rate_df, params["svd_k"], train_bs_funds=params["train_bs_funds"])

100%|██████████| 79848/79848 [04:56<00:00, 269.08it/s]


In [6]:
len(rec_dict)

79848

In [7]:
# All group decode
rec_result = {}

for user in tqdm(rec_dict):
    dec_user = id2cat(u2idx, int(user))
    dec_funds = []
    for item in rec_dict[user]:
        dec_funds.append(id2cat(i2idx, item))
    rec_result[dec_user] = dec_funds
    

100%|██████████| 79848/79848 [04:57<00:00, 268.00it/s]


In [8]:
r_name = "r_2021_12_01_mf_k53_test"
json_name = r_name+".json"
json_save_path = os.path.join(path.result_path, json_name) 
with open(json_save_path, 'w') as jf:
    json.dump(rec_result, jf)


result_path = os.path.join(path.export_path , "result")
save_path = os.path.join(result_path, f"{r_name}.csv")
convert_result_json_2_csv(json_save_path, save_path)

Convert /tf/recommenders/export/result/r_2021_12_01_mf_k53_test.json to /tf/recommenders/export/result/r_2021_12_01_mf_k53_test.csv success.
