In [88]:
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import ast
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix, hstack

project_dir = Path("C:/Users/adbou/source/repos/KFHXRelatedAi/")
os.chdir(project_dir)

from Configs.GeneralPaths import SOURCEDATA

In [89]:
user_transactions = pd.read_excel(Path(SOURCEDATA / "Transaction_User.xlsx"))
new_user_transaction = user_transactions.drop(columns=['TrxId'])

deals_data = pd.read_excel(Path(SOURCEDATA / "Rdepemtion_Cleaned_Deals.xlsx"))
deals_data = deals_data.drop(columns=['Unnamed: 0.1'])

deals_embeddings = pd.read_csv(Path(SOURCEDATA / "Deals_Embeddings_test.csv"))
deals_embeddings['ada_embedding'] = deals_embeddings['ada_embedding'].apply(ast.literal_eval)

new_user_transaction = new_user_transaction.merge(deals_data[['ContentId', 'Categories','Deal Type']], left_on='FK_ContentId', right_on='ContentId', how='left')
new_user_transaction = new_user_transaction.drop(columns=['ContentId'])

In [90]:
new_user_transaction["Categories"] = new_user_transaction["Categories"].apply(lambda x: "Food and Beverage" if x == "F&B" else x)

In [91]:
new_user_transaction.head()

Unnamed: 0,FK_BusinessUserId,PointsRedeemed,FK_ContentId,Categories,Deal Type
0,976480,1000,113923,Health and Medical,Discount
1,976480,10000,113853,Electronics and Appliances,Discount
2,976480,10000,113853,Electronics and Appliances,Discount
3,976921,10,113851,Restaurants and Food Services,Subscription
4,976480,50,113835,Ground Transportation,Other


In [92]:
#new_user_transaction['LogPointsRedeemed'] = np.log1p(new_user_transaction['PointsRedeemed'])

In [93]:
user_item_matrix = new_user_transaction.pivot_table(index='FK_BusinessUserId', columns='FK_ContentId', values='PointsRedeemed',aggfunc="count",fill_value=0)


In [94]:
user_item_matrix.head()

FK_ContentId,113816,113817,113819,113823,113824,113829,113830,113833,113834,113835,...,115217,115218,115221,115223,115225,115227,115229,115231,115244,115259
FK_BusinessUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
976480,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
976481,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
976482,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
976484,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
976485,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [95]:
sparse_user_item = csr_matrix(user_item_matrix)


In [96]:
from implicit.als import AlternatingLeastSquares

model = AlternatingLeastSquares(factors=30, regularization=0.3, iterations=80)

In [97]:
alpha_val = 10
data_conf = (sparse_user_item * alpha_val).astype('double')
model.fit(data_conf)

100%|██████████| 80/80 [00:03<00:00, 22.22it/s]


In [98]:

# def recommend_items(user_id, user_item_matrix, model, deal_embeddings, n_similar_items=10):
#     user_index = list(user_item_matrix.index).index(user_id)
#     user_interactions = sparse_user_item[user_index]
    
    
#     interacted_items_indices = user_interactions.indices
    
    
#     item_factors = model.item_factors
    
    
#     filtered_embeddings = deal_embeddings[deal_embeddings['ContentId'].isin(user_item_matrix.columns)]
    
    
#     deal_embeddings_array = np.array(filtered_embeddings['ada_embedding'].tolist())
    
    
#     similarity_matrix_factors = cosine_similarity(item_factors)
    
    
#     similarity_matrix_embeddings = cosine_similarity(deal_embeddings_array)
    
    
#     min_shape = min(similarity_matrix_factors.shape[0], similarity_matrix_embeddings.shape[0])
#     similarity_matrix_factors = similarity_matrix_factors[:min_shape, :min_shape]
#     similarity_matrix_embeddings = similarity_matrix_embeddings[:min_shape, :min_shape]
    
    
#     combined_similarity_matrix = (similarity_matrix_factors + similarity_matrix_embeddings) / 2.0
    
#     unique_similar_items = set()
#     similar_items_with_scores = []
    
#     for item_index in interacted_items_indices:
#         similar_items = combined_similarity_matrix[item_index].argsort()[::-1][1:n_similar_items+1]
#         for similar_item in similar_items:
#             if similar_item not in unique_similar_items and similar_item not in interacted_items_indices:
#                 unique_similar_items.add(similar_item)
#                 similar_items_with_scores.append((similar_item, combined_similarity_matrix[item_index][similar_item]))
    
    
#     similar_items_with_scores = sorted(similar_items_with_scores, key=lambda x: x[1], reverse=True)
    
    
#     similar_item_indices = [item for item, score in similar_items_with_scores[:n_similar_items]]
    
   
#     similar_item_ids = [user_item_matrix.columns[item_id] for item_id in similar_item_indices]
#     similar_item_scores = [score for item, score in similar_items_with_scores[:n_similar_items]]
    
#     return similar_item_ids, similar_item_scores

In [99]:
# def recommend_items(user_id, user_item_matrix, model, deal_embeddings, n_similar_items=10, new_deal_boost=0.4, popular_deal_penalty=0.4):
#     user_index = list(user_item_matrix.index).index(user_id)
#     user_interactions = sparse_user_item[user_index]
    
    
#     interacted_items_indices = user_interactions.indices
    

#     item_factors = model.item_factors
    
    
#     filtered_embeddings = deal_embeddings[deal_embeddings['ContentId'].isin(user_item_matrix.columns)]
    
    
#     deal_embeddings_array = np.array(filtered_embeddings['ada_embedding'].tolist())
    
    
#     similarity_matrix_factors = cosine_similarity(item_factors)
    
    
#     similarity_matrix_embeddings = cosine_similarity(deal_embeddings_array)
    
    
#     min_shape = min(similarity_matrix_factors.shape[0], similarity_matrix_embeddings.shape[0])
#     similarity_matrix_factors = similarity_matrix_factors[:min_shape, :min_shape]
#     similarity_matrix_embeddings = similarity_matrix_embeddings[:min_shape, :min_shape]
    
#     combined_similarity_matrix = (similarity_matrix_factors + similarity_matrix_embeddings) / 2.0
    
#     unique_similar_items = {}
    
#     for item_index in interacted_items_indices:
#         similar_items = combined_similarity_matrix[item_index].argsort()[::-1][1:n_similar_items+1]
#         for similar_item in similar_items:
#             if similar_item not in interacted_items_indices:
#                 score = combined_similarity_matrix[item_index][similar_item]
#                 if similar_item in unique_similar_items:
#                     unique_similar_items[similar_item] = max(unique_similar_items[similar_item], score)
#                 else:
#                     unique_similar_items[similar_item] = score

#     all_deal_indices = set(range(combined_similarity_matrix.shape[0]))
#     non_redeemed_deals = all_deal_indices - set(interacted_items_indices)
#     for deal in non_redeemed_deals:
#         if deal in unique_similar_items:
#             unique_similar_items[deal] = max(unique_similar_items[deal], new_deal_boost)
#         else:
#             unique_similar_items[deal] = new_deal_boost
    
#     redeemed_counts = user_item_matrix.sum(axis=0)
#     max_redeemed_count = redeemed_counts.max()
#     for item in unique_similar_items.keys():
#         if redeemed_counts[user_item_matrix.columns[item]] > 0:
#             penalty = popular_deal_penalty * (redeemed_counts[user_item_matrix.columns[item]] / max_redeemed_count)
#             unique_similar_items[item] -= penalty
    
#     similar_items_with_scores = sorted(unique_similar_items.items(), key=lambda x: x[1], reverse=True)
    
#     similar_item_indices = [item for item, score in similar_items_with_scores[:n_similar_items]]
    
#     similar_item_ids = [user_item_matrix.columns[item_id] for item_id in similar_item_indices]
#     similar_item_scores = [score for item, score in similar_items_with_scores[:n_similar_items]]
    
#     return similar_item_ids, similar_item_scores


Recommendations using Implicit ALS with penalties 

In [130]:
def recommend_items(user_id, user_item_matrix, model, deal_embeddings, deal_data, n_similar_items=10, new_deal_boost=0.0, popular_deal_penalty=0.0, category_penalty=0.0):
    user_index = list(user_item_matrix.index).index(user_id)
    user_interactions = sparse_user_item[user_index]
    
    
    interacted_items_indices = user_interactions.indices

    
    item_factors = model.item_factors

    
    filtered_embeddings = deal_embeddings[deal_embeddings['ContentId'].isin(user_item_matrix.columns)]


    deal_embeddings_array = np.array(filtered_embeddings['ada_embedding'].tolist())

    
    similarity_matrix_factors = cosine_similarity(item_factors)

    
    similarity_matrix_embeddings = cosine_similarity(deal_embeddings_array)

    min_shape = min(similarity_matrix_factors.shape[0], similarity_matrix_embeddings.shape[0])
    similarity_matrix_factors = similarity_matrix_factors[:min_shape, :min_shape]
    similarity_matrix_embeddings = similarity_matrix_embeddings[:min_shape, :min_shape]

    combined_similarity_matrix = (similarity_matrix_factors + similarity_matrix_embeddings) / 2.0

    unique_similar_items = {}

    for item_index in interacted_items_indices:
        similar_items = combined_similarity_matrix[item_index].argsort()[::-1][1:n_similar_items+1]
        for similar_item in similar_items:
            if similar_item not in interacted_items_indices:
                score = combined_similarity_matrix[item_index][similar_item]
                if similar_item in unique_similar_items:
                    unique_similar_items[similar_item] = max(unique_similar_items[similar_item], score)
                else:
                    unique_similar_items[similar_item] = score

    all_deal_indices = set(range(combined_similarity_matrix.shape[0]))
    non_redeemed_deals = all_deal_indices - set(interacted_items_indices)
    for deal in non_redeemed_deals:
        if deal in unique_similar_items:
            unique_similar_items[deal] = max(unique_similar_items[deal], new_deal_boost)
        else:
            unique_similar_items[deal] = new_deal_boost

    redeemed_counts = user_item_matrix.sum(axis=0)
    max_redeemed_count = redeemed_counts.max()
    for item in unique_similar_items.keys():
        if redeemed_counts[user_item_matrix.columns[item]] > 0:
            penalty = popular_deal_penalty * (redeemed_counts[user_item_matrix.columns[item]] / max_redeemed_count)
            unique_similar_items[item] -= penalty

    # Category-based penalty (to consider the category that the user have redeemed)
    user_redeemed_categories = deal_data[deal_data['ContentId'].isin(user_item_matrix.columns[interacted_items_indices])]['Categories'].unique()
    for item in unique_similar_items.keys():
        item_category = deal_data.loc[deal_data['ContentId'] == user_item_matrix.columns[item], 'Categories'].values[0]
        if item_category not in user_redeemed_categories:
            unique_similar_items[item] -= category_penalty

    similar_items_with_scores = sorted(unique_similar_items.items(), key=lambda x: x[1], reverse=True)

    similar_item_indices = [item for item, score in similar_items_with_scores[:n_similar_items]]

    similar_item_ids = [user_item_matrix.columns[item_id] for item_id in similar_item_indices]
    similar_item_scores = [score for item, score in similar_items_with_scores[:n_similar_items]]

    return similar_item_ids, similar_item_scores

In [131]:
user_id = 1110688
recommended_items = recommend_items(user_id, user_item_matrix, model,deals_embeddings,deals_data)
print(f"Recommended items for user {user_id}: {recommended_items[0]}")
print(f"Scores items for user {user_id}: {recommended_items[1]}")

Recommended items for user 1110688: [113816, 115244, 115223, 115107, 113843, 113842, 115225, 115207, 115229, 115215]
Scores items for user 1110688: [0.6853907627746957, 0.49947686621453524, 0.4612428155423244, 0.382271235452077, 0.3564874754814718, 0.33799152621952805, 0.33423523546290607, 0.3315333628742438, 0.3292817916352564, 0.32527518644550957]


In [141]:
specific_content_id = 115215,
result = deals_data[deals_data['ContentId'] == specific_content_id]

result

Unnamed: 0.1,Unnamed: 0,ContentId,Title,FK_StatusId,Deal Type,Description,Location,Points,Categories
189,189,115215,$10 Google Play Gift Card,1,Gift Card,Get a $10 Google Play Gift Card to be used for...,,3500,Digital Goods-Applications


In [118]:
user_last_interactions = user_item_matrix.loc[user_id]
user_interacted_items = user_last_interactions[user_last_interactions > 0].index
user_deals = deals_data[deals_data['ContentId'].isin(user_interacted_items)]
user_deals

Unnamed: 0.1,Unnamed: 0,ContentId,Title,FK_StatusId,Deal Type,Description,Location,Points,Categories
36,36,113851,3-Months DeliverooPlus Subscription,1,Subscription,Get 3-months Deliveroo Plus subscription,https://www.instagram.com/deliveroo_kw/,10,Restaurants and Food Services
100,100,113915,Recharge your Deliveroo wallet with,1,Wallet Recharge,Recharge your Deliveroo wallet with 3KD,https://www.instagram.com/deliveroo_kw/,3000,Restaurants and Food Services
101,101,113916,Recharge your Deliveroo wallet with,1,Wallet Recharge,Recharge your Deliveroo wallet with 5KD,https://www.instagram.com/deliveroo_kw/,5000,Restaurants and Food Services
135,135,115013,10$ Gift Card from Amazon!,2,Gift Card,Get a 10$ Gift Card from Amazon,,3500,Entertainment
144,144,115099,Get wallet recharge at Oula Fuel App.,1,Wallet Recharge,Get 5KD wallet recharge at Oula Fuel App.,https://www.google.com/maps/search/%D9%85%D8%A...,5000,Automotive Services


In [119]:
user_trs = new_user_transaction[new_user_transaction["FK_BusinessUserId"] == user_id]
user_trs

Unnamed: 0,FK_BusinessUserId,PointsRedeemed,FK_ContentId,Categories,Deal Type
36488,1110688,3000,113915,Restaurants and Food Services,Wallet Recharge
36532,1110688,10,113851,Restaurants and Food Services,Subscription
36533,1110688,5000,113916,Restaurants and Food Services,Wallet Recharge
36609,1110688,3000,113915,Restaurants and Food Services,Wallet Recharge
36720,1110688,5000,113916,Restaurants and Food Services,Wallet Recharge
36748,1110688,5000,115099,Automotive Services,Wallet Recharge
36783,1110688,3000,113915,Restaurants and Food Services,Wallet Recharge
36880,1110688,5000,113916,Restaurants and Food Services,Wallet Recharge
37016,1110688,3500,115013,Entertainment,Gift Card
37017,1110688,3500,115013,Entertainment,Gift Card


In [105]:
# def recommend_items(user_id, user_item_matrix, model, n_similar_items=10):
#     user_index = list(user_item_matrix.index).index(user_id)
#     user_interactions = sparse_user_item[user_index]
    
#     interacted_items_indices = user_interactions.indices
#     item_factors = model.item_factors
    
#     similarity_matrix = cosine_similarity(item_factors)
    
#     unique_similar_items = set()
#     similar_items_with_scores = []
    
#     for item_index in interacted_items_indices:
#         similar_items = similarity_matrix[item_index].argsort()[::-1][1:n_similar_items+1]
#         for similar_item in similar_items:
#             if similar_item not in unique_similar_items and similar_item not in interacted_items_indices:
#                 unique_similar_items.add(similar_item)
#                 similar_items_with_scores.append((similar_item, similarity_matrix[item_index][similar_item]))
    
#     similar_items_with_scores = sorted(similar_items_with_scores, key=lambda x: x[1], reverse=True)
    
#     similar_item_indices = [item for item, score in similar_items_with_scores[:n_similar_items]]
    
#     similar_item_ids = [user_item_matrix.columns[item_id] for item_id in similar_item_indices]
#     similar_item_scores = [score for item, score in similar_items_with_scores[:n_similar_items]]
   
#     return similar_item_ids, similar_item_scores