In [3]:
import pandas as pd
from customrec_engine import engines_list, engines, RecommendationAbstract, PRODUCT_DATAS
import pprint
from sklearn.model_selection import train_test_split
from typing import List, Tuple
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

product_datas = PRODUCT_DATAS


In [18]:


results = [] 
for product_data in product_datas[2:]:
    pprint.pprint(product_data)
    # print("looking at", "../" + product_data["product_filepath"])

    productdf =  pd.read_csv("../" + product_data["product_filepath"])
    transactiondf = pd.read_csv("../" + product_data["transactions_filepath"])
    
    training_df_arr = []
    
    
    # join transactions by same user_id. into a dict of user_id: [transactions]
    user_transactions = {}
    for row in transactiondf.iterrows():
    # for row in transactiondf.iterrows():
        training_df_arr.append(row[1])
        user_id = row[1]["user_id"]
        if user_id not in user_transactions:
            user_transactions[user_id] = []
        user_transactions[user_id].append(row[1]['product_id'])
    
    # create df from transactionsdf
    transactiondf = pd.DataFrame(training_df_arr)
    
    past_transactions, test_transactions = train_test_split(list(user_transactions.values()), test_size=.2, random_state=42)
    
    # for each engine rec. Train, test:
    for rec_engine_class in engines_list:
        print("=========", rec_engine_class.strategy_name, "=========")
        rec_engine: RecommendationAbstract  = rec_engine_class(products=productdf, product_data=product_data, transactions = transactiondf)
        rec_engine.train(auto_save=False)
        hits = []
        true_values = []  # Actual values
        predicted_values = []  # Predicted values
        failures = 0
        
        for user_transactions in test_transactions:
            try:
                if len(user_transactions) < 2:
                    failures += 1
                    # print("skipping user with less than 2 transactions")
                    continue
                
                past_transactions, pred_transactions = train_test_split(user_transactions, test_size=.25, random_state=42)
                recs: List[Tuple[dict, float]] = rec_engine.recommend_from_past(past_transactions)
                if len(recs) == 0:
                    failures += 1
                    print("skipping user with no recommendations")
                    continue
                
                recommendation_ids = [rec[0]['product_id'] for rec in recs]
                hit = 0
                for rec in recommendation_ids:
                    if rec in pred_transactions:
                        hit = 1
                        break
                hits.append(hit)
                true_values.append(1)  # Assuming 1 represents a hit
                predicted_values.append(hit)
            except Exception as e:
                failures += 1
                print(e)
                        
        accuracy = accuracy_score(true_values, predicted_values)
        precision = precision_score(true_values, predicted_values)
        recall = recall_score(true_values, predicted_values)
        # assert(len(true_values) == len(predicted_values))
        
        results.append({
            "recommender_model": rec_engine_class.strategy_name,
            "unique_name": product_data['unique_name'],
            "hits": sum(hits),
            "out of": len(hits),
            "data_context": product_data["data_context"],
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "failures": failures,
            "count_unique_users_tested": len(test_transactions),
            "count_unique_users_train": len(training_df_arr),
        })
        
        try:
            
            f1 = f1_score(true_values, predicted_values)
            
            mae = mean_absolute_error(true_values, predicted_values)
            mse = mean_squared_error(true_values, predicted_values)
            r2 = r2_score(true_values, predicted_values)
            results["f1"] = f1
            results["mae"] = mae
            results["mse"] = mse
            results["r2"] = r2
        except Exception as e:
            print(e)
            pass
df_results = pd.DataFrame(results)
# store results
df_results.to_csv("results.csv")

{'data_context': 'games',
 'product_count': 2274,
 'product_features': ['id',
                      'product_title',
                      'product_price',
                      'product_soup',
                      'product_tags',
                      'count'],
 'product_filepath': 'data/products_games_v2_t6_p12.csv',
 'product_unique_count': 2274,
 'transaction_count': 1698830,
 'transaction_features': ['id', 'user_id', 'product_id', 'rate'],
 'transactions_filepath': 'data/transactions_games_v2_t6_p12.csv',
 'unique_name': '_games_v2_t6_p12',
 'user_count': 149434,
 'user_features': ['id', 'details_soup'],
 'user_filepath': 'data/users_games_v2_t6_p12.csv',
 'user_unique_count': 149434,
 'version': '2'}


In [None]:
df_results.head(40)

Unnamed: 0,recommender_model,unique_name,hits,out of,data_context,accuracy,precision,recall,failures,count_unique_users_tested,count_unique_users_train
0,WordVec,_games_v2_t6_p12,2,7,games,0.285714,1.0,0.285714,585,592,3000
1,TitleWordVec,_games_v2_t6_p12,0,0,games,,0.0,0.0,592,592,3000
2,KNN Basic,_games_v2_t6_p12,2,7,games,0.285714,1.0,0.285714,585,592,3000
3,KNN With Means,_games_v2_t6_p12,2,7,games,0.285714,1.0,0.285714,585,592,3000
4,KNN With ZScore,_games_v2_t6_p12,2,7,games,0.285714,1.0,0.285714,585,592,3000
5,KNN With Means,_games_v2_t6_p12,2,7,games,0.285714,1.0,0.285714,585,592,3000
6,Matrix Basic,_games_v2_t6_p12,1,7,games,0.142857,1.0,0.142857,585,592,3000
7,SVD Factorization,_games_v2_t6_p12,2,7,games,0.285714,1.0,0.285714,585,592,3000
8,SVD PP Matrix Factorization,_games_v2_t6_p12,2,7,games,0.285714,1.0,0.285714,585,592,3000
9,NMF Matrix Factorization,_games_v2_t6_p12,0,7,games,0.0,0.0,0.0,585,592,3000


Unnamed: 0,product_id,product_title,product_image,product_price,product_soup,product_tags
0,250460,Bridge Constructor,,19.99,Bridge Constructor Released on 2013-10-16 Wind...,"Windows, Mac, Linux, Steam Deck,"
1,321290,Dandelion - Wishes brought to you -,,29.99,Dandelion - Wishes brought to you - Released o...,"Windows, Mac, Steam Deck,"
2,367670,Controller Companion,,2.99,Controller Companion Released on 2015-05-04 Wi...,"Windows, Steam Deck,"
3,371970,Barony,,0.0,Barony Released on 2015-06-23 Windows Mac Linu...,"Windows, Mac, Linux, Steam Deck,"
4,35000,Mini Ninjas,,9.99,Mini Ninjas Released on 2009-09-08 Windows Ste...,"Windows, Steam Deck,"
