In [1630]:
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import ast
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix, hstack

project_dir = Path("C:/Users/adbou/source/repos/KFHXRelatedAi/")
os.chdir(project_dir)

from Configs.GeneralPaths import SOURCEDATA

In [1631]:
user_transactions = pd.read_excel(Path(SOURCEDATA / "Transaction_User.xlsx"))
new_user_transaction = user_transactions.drop(columns=['TrxId'])

deals_data = pd.read_excel(Path(SOURCEDATA / "Cleaned_Deals.xlsx"))
deals_data = deals_data.drop(columns=['Unnamed: 0'])

deals_embeddings = pd.read_csv(Path(SOURCEDATA / "Deals_Embeddings.csv"))
deals_embeddings['ada_embedding'] = deals_embeddings['ada_embedding'].apply(ast.literal_eval)

new_user_transaction = new_user_transaction.merge(deals_data[['ContentId', 'Categories','Deal Type']], left_on='FK_ContentId', right_on='ContentId', how='left')
new_user_transaction = new_user_transaction.drop(columns=['ContentId'])
new_user_transaction

Unnamed: 0,FK_BusinessUserId,PointsRedeemed,FK_ContentId,Categories,Deal Type
0,976480,1000,113923,Health & Beauty,Discount
1,976480,10000,113853,Retail,Discount
2,976480,10000,113853,Retail,Discount
3,976921,10,113851,F&B,Subscription
4,976480,50,113835,Travel,Voucher
...,...,...,...,...,...
37946,1048474,5000,115099,Travel,Wallet Recharge
37947,1056928,6000,115217,Entertainment,Gift Card
37948,1075249,3500,115231,Entertainment,Gift Card
37949,1110587,6000,115217,Entertainment,Gift Card


In [1632]:
new_user_transaction['LogPointsRedeemed'] = np.log1p(new_user_transaction['PointsRedeemed'])


In [1633]:
user_item_matrix = new_user_transaction.pivot_table(index='FK_BusinessUserId', columns='FK_ContentId', values='LogPointsRedeemed',fill_value=0)


In [1634]:
frequency_user_item_matrix = new_user_transaction.pivot_table(
    index='FK_BusinessUserId', 
    columns='FK_ContentId', 
    values='PointsRedeemed', 
    aggfunc='count', 
    fill_value=0
)

In [1635]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
normalized_points_matrix = scaler.fit_transform(user_item_matrix)
normalized_frequency_matrix = scaler.fit_transform(frequency_user_item_matrix)


In [1636]:
user_item_matrix

FK_ContentId,113816,113817,113819,113823,113824,113829,113830,113833,113834,113835,...,115217,115218,115221,115223,115225,115227,115229,115231,115244,115259
FK_BusinessUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
976480,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,3.931826,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
976481,0.0,0.0,0.0,0.0,0.0,4.615121,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
976482,0.0,0.0,0.0,0.0,0.0,4.615121,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
976484,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
976485,0.0,0.0,0.0,0.0,0.0,4.615121,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1118759,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1119351,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1120134,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1120159,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [1637]:
combined_matrix = 0.5 * normalized_points_matrix + 0.5 * normalized_frequency_matrix


In [1638]:
combined_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [1639]:
# scaler = StandardScaler()
# new_user_transaction['PointsRedeemed'] = scaler.fit_transform(new_user_transaction[['PointsRedeemed']])

In [1640]:
sparse_user_item = csr_matrix(combined_matrix)


In [1641]:
# from implicit.evaluation import precision_at_k
# import itertools
# from sklearn.model_selection import train_test_split
# from implicit.als import AlternatingLeastSquares

# # Define hyperparameter grid
# factors_range = [10, 14, 20]
# regularization_range = [0.1, 0.5, 1.0]
# iterations_range = [30, 50, 70]

# # Split data into training and validation sets
# train_data, test_data = train_test_split(new_user_transaction, test_size=0.2, random_state=42)
# train_user_item_matrix = train_data.pivot_table(index='FK_BusinessUserId', columns='FK_ContentId', values='PointsRedeemed', fill_value=0)
# sparse_train_user_item = csr_matrix(train_user_item_matrix.values)

# def evaluate_model(model, train_user_item_matrix, test_data, k=10):
#     test_user_item_matrix = test_data.pivot_table(index='FK_BusinessUserId', columns='FK_ContentId', values='PointsRedeemed', fill_value=0)
#     sparse_test_user_item = csr_matrix(test_user_item_matrix.values)
    
#     # Evaluate precision@k
#     precision = precision_at_k(model, sparse_train_user_item, sparse_test_user_item, K=k)
#     return precision

# # Grid search
# best_precision = 0
# best_params = {}
# for factors, regularization, iterations in itertools.product(factors_range, regularization_range, iterations_range):
#     model = AlternatingLeastSquares(factors=factors, regularization=regularization, iterations=iterations)
#     alpha_val = 40
#     data_conf = (sparse_train_user_item * alpha_val).astype('double')
#     model.fit(data_conf)
    
#     precision = evaluate_model(model, train_user_item_matrix, test_data, k=10)
#     print(f"factors: {factors}, regularization: {regularization}, iterations: {iterations}, precision@10: {precision}")
    
#     if precision > best_precision:
#         best_precision = precision
#         best_params = {'factors': factors, 'regularization': regularization, 'iterations': iterations}

# print(f"Best params: {best_params}, Best precision@10: {best_precision}")


In [1642]:
from implicit.als import AlternatingLeastSquares

model = AlternatingLeastSquares(factors=10, regularization=0.5, iterations=30)

In [1643]:
alpha_val = 40
data_conf = (sparse_user_item * alpha_val).astype('double')
model.fit(data_conf)

100%|██████████| 30/30 [00:01<00:00, 27.92it/s]


In [1644]:
def recommend_items(user_id, user_item_matrix, model, n_recommendations=10):
    user_index = list(user_item_matrix.index).index(user_id)
    user_interactions = sparse_user_item[user_index]
    print(f"User Interactions for user {user_id}: {user_interactions}")
    recommendations = model.recommend(user_index, user_interactions, N=n_recommendations)
    recommended_item_indices, recommended_scores = recommendations
    recommended_item_ids = [user_item_matrix.columns[item_id] for item_id in recommended_item_indices]
    
    return recommended_item_ids, recommended_scores

In [1645]:
user_id = 994853
recommended_items = recommend_items(user_id, user_item_matrix, model)
print(f"Recommended items for user {user_id}: {recommended_items[0]}")
print(f"Scores items for user {user_id}: {recommended_items[1]}")

User Interactions for user 994853: <Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1 stored elements and shape (1, 69)>
  Coords	Values
  (0, 36)	1.0
Recommended items for user 994853: [113829, 113851, 115094, 113819, 115211, 115207, 113837, 115095, 113823, 113835]
Scores items for user 994853: [0.01744395 0.00971442 0.00800538 0.00519081 0.00467844 0.0046722
 0.00458154 0.00455483 0.00430739 0.0042228 ]


In [1646]:
def calculate_cosine_similarity(embedding, embeddings):
    similarities = cosine_similarity([embedding], embeddings)
    return similarities[0]

In [1647]:
def rerank_recommendations(user_id, recommended_item_ids, recommended_scores, deals_embeddings, user_item_matrix):
    
    recommended_embeddings = np.array([deals_embeddings[deals_embeddings['ContentId'] == item]['ada_embedding'].values[0] for item in recommended_item_ids])
    
    
    user_interacted_items = user_item_matrix.loc[user_id]
    user_interacted_items = user_interacted_items[user_interacted_items > 0].index.tolist()
    user_interacted_embeddings = np.array([deals_embeddings[deals_embeddings['ContentId'] == item]['ada_embedding'].values[0] for item in user_interacted_items])
    
    
    user_profile_embedding = np.mean(user_interacted_embeddings, axis=0)
    print(f"user_profile_embedding for user {user_id}: {user_profile_embedding}")
    
    cosine_similarities = calculate_cosine_similarity(user_profile_embedding, recommended_embeddings)
    
    
    combined_scores = recommended_scores + cosine_similarities
    

    reranked_indices = np.argsort(combined_scores)[::-1]
    reranked_recommendations = [recommended_item_ids[i] for i in reranked_indices]
    reranked_scores = [combined_scores[i] for i in reranked_indices]
    
    return reranked_recommendations, reranked_scores

In [1648]:
reranked_items, reranked_scores = rerank_recommendations(user_id, recommended_items[0], recommended_items[1], deals_embeddings, user_item_matrix)
print(f"Recommended items for user {user_id}: {reranked_items}")
print(f"Recommendation scores: {reranked_scores}")

user_profile_embedding for user 994853: [-0.00369272 -0.03283423 -0.03024087 ... -0.01188437 -0.02081456
 -0.00286538]
Recommended items for user 994853: [115211, 113823, 113819, 113835, 113829, 113837, 115094, 113851, 115207, 115095]
Recommendation scores: [0.5526092965598131, 0.5129111980211456, 0.47580579574048754, 0.45401790219006155, 0.4209301187343092, 0.3644506892766111, 0.3142203415717267, 0.3022400279471126, 0.2934740443842204, 0.24329796742813867]


In [1649]:
specific_content_id = 113837
result = deals_data[deals_data['ContentId'] == specific_content_id]

result

Unnamed: 0,ContentId,Title,FK_StatusId,Deal Type,Description,Location,Points,Categories
22,113837,Off from Cavaraty,1,Voucher,Get 10KD discount from Cavaraty,https://reach.link/cavaraty/,10000,Electronics


In [1650]:
user_last_interactions = user_item_matrix.loc[user_id]
user_interacted_items = user_last_interactions[user_last_interactions > 0].index
user_deals = deals_data[deals_data['ContentId'].isin(user_interacted_items)]
user_deals

Unnamed: 0,ContentId,Title,FK_StatusId,Deal Type,Description,Location,Points,Categories
144,115099,Get wallet recharge at Oula Fuel App.,1,Wallet Recharge,Get 5KD wallet recharge at Oula Fuel App.,https://www.google.com/maps/search/%D9%85%D8%A...,5000,Travel


In [1651]:
user_trs = new_user_transaction[new_user_transaction["FK_BusinessUserId"] == user_id]
user_trs

Unnamed: 0,FK_BusinessUserId,PointsRedeemed,FK_ContentId,Categories,Deal Type,LogPointsRedeemed
724,994853,5000,115099,Travel,Wallet Recharge,8.517393
725,994853,5000,115099,Travel,Wallet Recharge,8.517393
735,994853,5000,115099,Travel,Wallet Recharge,8.517393
737,994853,5000,115099,Travel,Wallet Recharge,8.517393
837,994853,5000,115099,Travel,Wallet Recharge,8.517393
839,994853,5000,115099,Travel,Wallet Recharge,8.517393
987,994853,5000,115099,Travel,Wallet Recharge,8.517393
3275,994853,5000,115099,Travel,Wallet Recharge,8.517393
4166,994853,5000,115099,Travel,Wallet Recharge,8.517393
4167,994853,5000,115099,Travel,Wallet Recharge,8.517393


In [1652]:
115094

115094