In [1]:
import pandas as pd
from customrec_engine import engines_list, engines, RecommendationAbstract
import pprint
from sklearn.model_selection import train_test_split
from typing import List, Tuple
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

product_datas = [{
    "data_context": "books",
    "product_filepath": "data/products_books_v1_10_10.csv",
    "transactions_filepath": "data/transactions_books_v1_10_10.csv",
    "features": ["product_title", "product_image", "product_soup", "product_images"],
    "version": "1.0",
    "unique_name": "_books_v1_10_10",
}]


In [4]:


results = [] # {data_context: [{strategy_name, accuracy, precision, recall, f1_score, are_roc}]}

for product_data in product_datas:
    pprint.pprint(product_data)
    print("looking at", "../" + product_data["product_filepath"])

    productdf =  pd.read_csv("../" + product_data["product_filepath"])
    transactiondf = pd.read_csv("../" + product_data["transactions_filepath"])
    
    training_df_arr = []
    
    
    # join transactions by same user_id. into a dict of user_id: [transactions]
    user_transactions = {}
    for row in transactiondf[:3000].iterrows():
    # for row in transactiondf.iterrows():
        training_df_arr.append(row[1])
        user_id = row[1]["user_id"]
        if user_id not in user_transactions:
            user_transactions[user_id] = []
        user_transactions[user_id].append(row[1]['product_id'])
    
    # create df from transactionsdf
    transactiondf = pd.DataFrame(training_df_arr)
    
    past_transactions, test_transactions = train_test_split(list(user_transactions.values()), test_size=.2, random_state=42)
    
    # for each engine rec. Train, test:
    for rec_engine_class in engines_list:
        print(rec_engine_class.strategy_name)
        rec_engine: RecommendationAbstract  = rec_engine_class(products=productdf, product_data=product_data, transactions = transactiondf)
        rec_engine.train(auto_save=False)
        hits = []
        true_values = []  # Actual values
        predicted_values = []  # Predicted values
        
        for user_transactions in test_transactions:
            if len(user_transactions) < 2:
                continue
            past_transactions, pred_transactions = train_test_split(user_transactions, test_size=.25, random_state=42)
            recs: List[Tuple[dict, float]] = rec_engine.recommend_from_past(past_transactions)
            recommendation_ids = [rec[0]['product_id'] for rec in recs]
            hit = 0
            for rec in recommendation_ids:
                if rec in pred_transactions:
                    hit = 1
                    break
            hits.append(hit)
            true_values.append(1)  # Assuming 1 represents a hit
            predicted_values.append(hit)
        
        accuracy = accuracy_score(true_values, predicted_values)
        precision = precision_score(true_values, predicted_values)
        recall = recall_score(true_values, predicted_values)
        f1 = f1_score(true_values, predicted_values)
        
        mae = mean_absolute_error(true_values, predicted_values)
        mse = mean_squared_error(true_values, predicted_values)
        r2 = r2_score(true_values, predicted_values)

        results.append({
            "recommender_model": rec_engine_class.strategy_name,
            "hits": sum(hits),
            "out of": len(hits),
            "data_context": product_data["data_context"],
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1": f1,
            "mse": mse,
            "r^2": r2
        })

        
df_results = pd.DataFrame(results)
df_results.head()

{'data_context': 'books',
 'features': ['product_title',
              'product_image',
              'product_soup',
              'product_images'],
 'product_filepath': 'data/products_books_v1_10_10.csv',
 'transactions_filepath': 'data/transactions_books_v1_10_10.csv',
 'unique_name': '_books_v1_10_10',
 'version': '1.0'}
looking at ../data/products_books_v1_10_10.csv
WordVec


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


TitleWordVec
KNN Basic
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
KNN With Means
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
KNN With ZScore
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
KNN With Means
Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
Matrix Basic
Computing the pearson similarity matrix...
Done computing similarity matrix.
SVD Factorization
Computing the pearson similarity matrix...
Done computing similarity matrix.
SVD PP Matrix Factorization
Computing the pearson similarity matrix...
Done computing similarity matrix.
NMF Matrix Factorization
Computing the pearson similarity matrix...
Done computing similarity matrix.
Slope One Recommender
Computing the pearson similarity matrix...
Done c

Unnamed: 0,recommender_model,hits,out of,data_context,accuracy,precision,recall,f1,mse,r^2
0,WordVec,0,14,books,0.0,0.0,0.0,0.0,1.0,0.0
1,TitleWordVec,2,14,books,0.142857,1.0,0.142857,0.25,0.857143,0.0
2,KNN Basic,2,14,books,0.142857,1.0,0.142857,0.25,0.857143,0.0
3,KNN With Means,2,14,books,0.142857,1.0,0.142857,0.25,0.857143,0.0
4,KNN With ZScore,2,14,books,0.142857,1.0,0.142857,0.25,0.857143,0.0
