# insert titel of code

generic intro text

## imports

### Libraries

In [None]:
import pickle
import torch
import matplotlib.pyplot as plt
import argparse
import numpy as np

### Custom scripts

In [None]:
# from scripts.args import *
from scripts.base_model.preprocessing import *
from scripts.base_model.train_base import *
from scripts.base_model.models import *
from scripts.evaluation.eval_model import *
from scripts.evaluation.get_results import *
from scripts.CEF_model.CEF_model import *
from scripts.CEF_model.train_CEF import *


## Args

### args preprocessing

sentires_dir    =       location of the preprocessed data. \
review_dir      =       location of the json dataset \
user_thresh     =       how many reviews a user needs to have \
item_thresh     =       how many reviews an item has to have \
sample_ratio    =       \
test_length     =       how many items in the test set. \
neg_length      =       amount of negative items. \
save_path       =       where the dataset object will be saved. \
user_pre        =       wether or not to use a pre created dataset. If this value is true it will use                     the data stroted in save_path.

In [None]:
def arg_parser_preprocessing():
    parser = argparse.ArgumentParser()
    parser.add_argument("--sentires_dir", dest="sentires_dir", type=str, default="data/input_data/reviews_with_features.txt", 
                        help="path to sentires data")
    parser.add_argument("--review_dir", dest="review_dir", type=str, default="data/input_data/reviews_Electronics_5_filtered.json", 
                        help="path to original review data")
    parser.add_argument("--user_thresh", dest="user_thresh", type=int, default=20, 
                        help="remove users with reviews less than this threshold")
    parser.add_argument("--item_thresh", dest="item_thresh", type=int, default=10, 
                        help="remove users with reviews less than this threshold")
    parser.add_argument("--sample_ratio", dest="sample_ratio", type=int, default=2, 
                        help="the (negative: positive sample) ratio for training BPR loss")
    parser.add_argument("--test_length", dest="test_length", type=int, default=5, 
                        help="the number of test items")
    parser.add_argument("--neg_length", dest="neg_length", type=int, default=100, help="# of negative samples in evaluation")
    parser.add_argument("--save_path", dest="save_path", type=str, default="data/preprocessed_data/dataset.pickle", 
                        help="The path to save the preprocessed dataset object")
    parser.add_argument("--use_pre", dest="use_pre", type=str, default=False, 
            help="Wether or not to use a stored dataset object")
    parser.add_argument("--extra_filter", dest="extra_filter", type=bool, default=True)
    return parser.parse_known_args()

### args Train base model

device = either cpu or cuda depending on wether you use a gpu \
batch_size = the batch size \
lr = learning rate \
rec_k = length of the recommendation list \
weight_decay = 

In [None]:
def arg_parser_training():
    parser = argparse.ArgumentParser()
    parser.add_argument("--device", dest = "device", type=str, default='cpu')
    parser.add_argument("--gpu", default=False)
    parser.add_argument("--batch_size", dest="batch_size", type=int, default=128)
    parser.add_argument("--lr", dest="lr", type=float, default=0.01)
    parser.add_argument("--rec_k", dest="rec_k", type=int, default=5, help="length of rec list")
    parser.add_argument("--weight_decay", default=0., type=float) # not sure whether to use
    parser.add_argument("--model_path", dest="model_path", type=str, default="data/models/model.model", 
                        help="The path to save the model")
    parser.add_argument("--epochs", dest="epochs", type=int, default=100)
    parser.add_argument("--use_pre", dest="use_pre", type=str, default=False, 
            help="Wether or not to use a stored model object")
    return parser.parse_known_args()

### arg CEF

In [None]:
# todo
def arg_parser_CEF():
    parser = argparse.ArgumentParser()
    parser.add_argument("--device", dest = "device", type=str, default='cpu')
    parser.add_argument("--rec_k", dest="rec_k", type=int, default=5, help="length of rec list")
    parser.add_argument("--ld", default=1, type=float) # not sure whether to use
    parser.add_argument("--lr", dest="lr", type=float, default=0.01)
    parser.add_argument("--model_path", dest="model_path", type=str, default="data/models/CEF_model.model", 
                        help="The path to save the model")
    parser.add_argument("--epochs", dest="epochs", type=int, default=100)
    parser.add_argument("--use_pre", dest="use_pre", type=str, default=False, 
            help="Wether or not to use a stored model object")
    return parser.parse_known_args()

### args get results

In [None]:
def arg_parser_results():
    parser = argparse.ArgumentParser()
    parser.add_argument("--device", dest = "device", type=str, default='cpu')
    parser.add_argument("--remove_size", dest="remove_size", type=int, default=50)
    parser.add_argument("--rec_k", dest="rec_k", type=int, default=5, help="length of rec list")
    parser.add_argument("--output_path", dest="output_path", type=str, default="results/result dicts/", 
                        help="The path to save the model")
    parser.add_argument("--epochs", dest="epochs", type=int, default=1000)
    parser.add_argument("--beta", dest="beta", type=int, default=0.1 )
    return parser.parse_known_args()

## Basemodel

In [None]:
device = "cpu"
np.random.seed(42)

### Preprocessing

In [None]:
# get the arguments for preprocessing
preprocessing_args, unkown = arg_parser_preprocessing()
# load dataset if the dataset exist
dataset_path = preprocessing_args.save_path
if preprocessing_args.use_pre:
    with open(dataset_path, "rb") as f:
        dataset = pickle.load(f)
else:
    dataset = preprocessing(preprocessing_args)
    with open(dataset_path, "wb") as f:
            pickle.dump(dataset, f)

### Train base model

In [None]:
train_args, _ = arg_parser_training()
model_path = train_args.model_path
if train_args.use_pre:
    base_model = BaseRecModel(dataset.feature_num, dataset).to(device)
    base_model.load_state_dict(torch.load(model_path))
else:
    base_model = trainmodel(train_args, dataset)

### Results of base model

In [None]:
ndcg, f1, _ = eval_model(dataset, 5, base_model, device)
print(f"ndcg : {ndcg}")
print(f"f1 : {f1}")

### CEF model

In [None]:
## stil to do.
CEF_args, _ = arg_parser_CEF()
model_path = CEF_args.model_path
CEF_model = CEF(CEF_args, dataset, base_model).to(device)
if CEF_args.use_pre:
    CEF_model.load_state_dict(torch.load(model_path))
else:
    CEF_model = train_delta(CEF_args, CEF_model)
        

### Obtain a list of feature IDs, ranked by explainability score according to the trained CEF model (to clean)

In [None]:
usepre = False
if usepre:
    with open("results/ranked_ids.pickle", "rb") as f:
        ranked_ids = pickle.load(f)
else:
    with open("results/ranked_ids.pickle", "wb") as f:
        ranked_ids = CEF_model.top_k()
        pickle.dump(ranked_ids, f)

### Plot results

In [None]:
result_args, _ = arg_parser_results()
with open("results/ranked_ids.pickle", "rb") as f:
    CEF_delete_list = pickle.load(f)
CEF_delete_list.reverse()
results = get_results(dataset, result_args, base_model,CEF_model, CEF_delete_list)

In [None]:
def plot_results(results):
    for method in results:
        result = results[method]
        plt.plot(result["lt"], result["ndcg"], label = method)
    
    plt.xlabel("long tail rate")
    plt.ylabel("NDCG")
    plt.legend()
    plt.show()

In [None]:
plot_results(results)