In [53]:
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")

In [3]:
import pandas as pd
import cornac
from cornac.models import EFM, MTER, NMF, BPR
from cornac.explainer import EFMExplainer, MTERExplainer, Mod_EFMExplainer
from cornac.eval_methods import RatioSplit
from cornac.data import Reader, SentimentModality
VERBOSE = False
SEED = 42

### Prepare model

##### load data

In [55]:
sentiment_fpath = '../tests/dataset/goodreads_sentiment.txt'
sentiment = Reader().read(sentiment_fpath, fmt='UITup', sep=',', tup_sep=':')
# Load rating and sentiment information
rating_fpath = '../tests/dataset/goodreads_rating.txt'
rating = Reader(min_item_freq=20).read(rating_fpath, fmt='UIR', sep=',')
sentiment_modality = SentimentModality(data=sentiment)
rs = RatioSplit(
            data=rating,
            test_size=0.2,
            exclude_unknowns=True,
            sentiment=sentiment_modality,
            verbose=VERBOSE,
            seed=SEED,
        )

##### Initialize models and get recommendations

In [5]:
from cornac.datasets.goodreads import prepare_data
rs = prepare_data(data_name="goodreads")
efm = EFM()
efm.fit(rs.train_set)
efm_explainer = EFMExplainer(efm, efm.train_set)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  efm.fit(rs.train_set)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  efm.fit(rs.train_set)


In [56]:
efm = EFM(
    num_explicit_factors = 40,
    num_latent_factors = 60,
    num_most_cared_aspects = 15,
    rating_scale = 5.0,
    alpha = 0.85,
    lambda_x = 1,
    lambda_y = 1,
    lambda_u = 0.01,
    lambda_h = 0.01,
    lambda_v = 0.01,
    max_iter = 100,
    verbose = VERBOSE,
    seed = SEED,
)

mter = MTER(
    n_user_factors = 10,
    n_item_factors = 10,
    n_aspect_factors = 10,
    n_opinion_factors = 10,
    n_bpr_samples = 1000,
    n_element_samples = 50,
    lambda_reg = 0.1,
    lambda_bpr = 10,
    max_iter = 3000,
    lr = 0.5,
    verbose = VERBOSE,
    seed = SEED,
)

#nmf = NMF(k=100, max_iter=100, verbose=VERBOSE, seed=SEED)
#bpr = BPR(k=10, verbose = VERBOSE, seed = SEED)

eval_metrics = [
    cornac.metrics.RMSE(),
    cornac.metrics.NDCG(k=50),
    cornac.metrics.AUC()
]

cornac.Experiment(
    eval_method = rs, models=[efm, mter], metrics = eval_metrics
).run()


TEST:
...
     |   RMSE |    AUC | NDCG@50 | Train (s) | Test (s)
---- + ------ + ------ + ------- + --------- + --------
EFM  | 0.9013 | 0.6208 |  0.0634 |    2.4847 |   6.4599
MTER | 1.2058 | 0.7831 |  0.1389 |   22.8753 |   0.6254



### EFMExplainer

In [57]:
user_idx_list = [1, 2, 3, 4, 5]
# get recommendation list from EFM
recom_list_efm = [[idx, efm.rank(idx)[0][0]] for idx in user_idx_list]
# transform to dataframe
recom_df_efm = pd.DataFrame(recom_list_efm, columns=['user_id', 'item_id'])

In [58]:
# initialize explainer
efm_exp = EFMExplainer(efm, efm.train_set)
# explain 1 recommendation
explanation_1 = efm_exp.explain_one_recommendation_to_user(user_id=1, item_id=1, index=True)
print(f"Recommend user {explanation_1['user_id'][0]} item {explanation_1['item_id'][0]}, because it performs well on the aspect: {explanation_1['aspect'][0]}")

# explain more recommendations
explanations = efm_exp.explain_recommendations(recom_df_efm)
explanations = pd.DataFrame(explanations, columns=['user_id', 'item_id', 'aspect', 'aspect_score'])
print("Explain more recommendations")
print(explanations.to_string(index=False))

Recommend user 1 item 1, because it performs well on the aspect: kiddies


Computing explanations: 100%|██████████| 5/5 [00:00<00:00, 226.85it/s]

Explain more recommendations
user_id item_id   aspect  aspect_score
      1       6  kiddies      6.367883
      2     102    kings      5.694288
      3       1 claptrap      6.422020
      4       6       ed      6.387510
      5      88  workout      5.851394





In [59]:
# when the input is real id not index, set index=False
explanation_1 = efm_exp.explain_one_recommendation_to_user(user_id='000d8d73b2676e0c0858a485fe21d56c', item_id='10048521', index=False)
print(f"Recommend user {explanation_1['user_id'][0]} item {explanation_1['item_id'][0]}, because it performs well on the aspect: {explanation_1['aspect'][0]}")


Recommend user 000d8d73b2676e0c0858a485fe21d56c item 10048521, because it performs well on the aspect: claptrap


### Modified_EFMExplainer

In [60]:
# initialize explainer
mod_efm_exp = Mod_EFMExplainer(efm, efm.train_set)
# explain 1 recommendation
explanation_1 = mod_efm_exp.explain_one_recommendation_to_user(user_id=3, item_id=1, index=True)
recommend_or_not = "Recommend" if explanation_1['recommend'][0] == True else "Not recommend"
print(f"{recommend_or_not} item {explanation_1['item_id'][0]} to user {explanation_1['user_id'][0]} , because the score of aspect: {explanation_1['aspect'][0]} is {explanation_1['aspect_score'][0]}")

# explain more recommendations
explanations = mod_efm_exp.explain_recommendations(recom_df_efm)
explanations = pd.DataFrame(explanations, columns=['user_id', 'item_id', 'recommend', 'aspect', 'aspect_score', 'max_aspect_name','max_aspect_score'])
print("Explain more recommendations")
print(explanations.to_string(index=False))

Recommend item 1 to user 3 , because the score of aspect: fools is 3.217013120651245


Computing explanations: 100%|██████████| 5/5 [00:02<00:00,  1.86it/s]

Explain more recommendations
user_id item_id recommend    aspect  aspect_score max_aspect_name  max_aspect_score
      1       6     False   stomach      2.888605         twister          4.728925
      2     102     False writingis      2.377862          trains          4.058887
      3       1      True     fools      3.217013     furnishings          4.445765
      4       6     False    bother      1.278869         twister          4.728925
      5      88     False  prospect      2.501148      accusation          4.102617





### MTERExplainer

In [61]:
# get recommendation list from MTER
recom_list_mter = [[idx, mter.rank(idx)[0][0]] for idx in user_idx_list]
recom_df_mter = pd.DataFrame(recom_list_mter, columns=['user_id', 'item_id'])

In [62]:
# initialize explainer
mter_exp = MTERExplainer(mter, mter.train_set)
# explain 1 recommendation
explanation_1 = mter_exp.explain_one_recommendation_to_user(user_id=1, item_id=1, index=True)
print(f"Recommend user {explanation_1['user_id'][0]} item {explanation_1['item_id'][0]}, because people's opinion on the aspect: {explanation_1['aspect'][0]} is {explanation_1['opinion'][0]}")

# explain more recommendations
explanations = mter_exp.explain_recommendations(recom_df_mter)
explanations = pd.DataFrame(explanations, columns=['user_id', 'item_id', 'aspect', 'opinion'])
print("Explain more recommendations")
print(explanations.to_string(index=False))

Recommend user 1 item 1, because people's opinion on the aspect: circlejerk is odd


Computing explanations: 100%|██████████| 5/5 [00:00<00:00, 367.83it/s]

Explain more recommendations
user_id item_id     aspect   opinion
      1       1 circlejerk       odd
      2       2    overuse       ill
      3     100  converges   council
      4       1        pan  poignant
      5       5     nitwit contrived





##### question of recommend()
note: which position is better to place the sentiment_generation file

In [63]:
from tqdm.auto import tqdm

In [64]:
class EFM(EFM):
    def __init__(self, name="EFM",
                 num_explicit_factors=40, num_latent_factors=60, num_most_cared_aspects=15,
                 rating_scale=5.0, alpha=0.85,
                 lambda_x=1, lambda_y=1, lambda_u=0.01, lambda_h=0.01, lambda_v=0.01,
                 use_item_aspect_popularity=True, max_iter=100,
                 num_threads=0, trainable=True, verbose=False, init_params=None, seed=None):
        super().__init__(self, )
        
    def recommend_to_one_user(self, user_idx):
        """
        Provide top-N recommendations for a list of users
        user_idx: list of user indices
        n: number of items to recommend
        
        return: a list [user_id, item_id, score]
        """
        item_rank, item_scores = self.rank(user_idx)
        return [user_idx, item_rank[0], item_scores[0]]
    
    def recommend_to_all_users(self, user_list=None):
        """
        Provide top-N recommendations for all users
        n: number of items to recommend
        
        return: a list of [[user_id, item_id, score], ...]
        """
        self.user_list = user_list
        if self.user_list is None:
            self.user_list = [idx for idx in range(self.train_set.num_users)]
        else:
            self.user_list = [idx for idx in self.user_list if idx in range(self.train_set.num_users)]
        recommendations = []
        with tqdm(total=len(self.user_list), desc="Computing recommendations: ") as pbar:
            for user_idx in self.user_list:
                # append result to recommendations
                recommendations.append(self.recommend_to_one_user(user_idx))
                self.recommend_to_one_user(user_idx)
                pbar.update(1)
        # transform recommendations to dataframe
        recommendations = pd.DataFrame(recommendations, columns=['user_id', 'item_id', 'score'])
        # return 'user_id', 'item_id'
        return recommendations[['user_id', 'item_id']]   

In [68]:
user_idx_list = [1, 2, 3, 4, 5]
efm = EFM().fit(rs.train_set)

#recom_list_efm = [[idx, efm.rank(idx)[0][0]] for idx in user_idx_list]
#recom_df_efm = pd.DataFrame(recom_list_efm, columns=['user_id', 'item_id'])
recom_df_efm = efm.recommend_to_all_users(user_list=[user_idx_list[0]])

recom_df_efm

Computing recommendations: 100%|██████████| 1/1 [00:00<00:00, 421.75it/s]


Unnamed: 0,user_id,item_id
0,1,50
