In [23]:
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import ast
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from scipy.sparse import csr_matrix, hstack


project_dir = Path("C:/Users/adbou/source/repos/KFHXRelatedAi/")
os.chdir(project_dir)

from Configs.GeneralPaths import SOURCEDATA

In [24]:
user_transactions = pd.read_excel(Path(SOURCEDATA / "Transaction_User.xlsx"))
new_user_transaction = user_transactions.drop(columns=['TrxId'])

deals_data = pd.read_excel(Path(SOURCEDATA / "Cleaned_Deals.xlsx"))
deals_data = deals_data.drop(columns=['Unnamed: 0'])

new_user_transaction = new_user_transaction.merge(deals_data[['ContentId', 'Categories' , 'Deal Type']], left_on='FK_ContentId', right_on='ContentId', how='left')
new_user_transaction = new_user_transaction.drop(columns=['ContentId'])


In [25]:
new_user_transaction

Unnamed: 0,FK_BusinessUserId,PointsRedeemed,FK_ContentId,Categories,Deal Type
0,976480,1000,113923,Health & Beauty,Discount
1,976480,10000,113853,Retail,Discount
2,976480,10000,113853,Retail,Discount
3,976921,10,113851,F&B,Subscription
4,976480,50,113835,Travel,Voucher
...,...,...,...,...,...
37946,1048474,5000,115099,Travel,Wallet Recharge
37947,1056928,6000,115217,Entertainment,Gift Card
37948,1075249,3500,115231,Entertainment,Gift Card
37949,1110587,6000,115217,Entertainment,Gift Card


In [26]:
user_item_matrix = new_user_transaction.pivot_table(index='FK_BusinessUserId', columns='FK_ContentId', values='PointsRedeemed', fill_value=0)


In [27]:
user_item_matrix

FK_ContentId,113816,113817,113819,113823,113824,113829,113830,113833,113834,113835,...,115217,115218,115221,115223,115225,115227,115229,115231,115244,115259
FK_BusinessUserId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
976480,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
976481,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
976482,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
976484,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
976485,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1118759,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1119351,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1120134,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1120159,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
user_item_matrix = new_user_transaction.pivot_table(index='FK_BusinessUserId', columns='FK_ContentId', values='PointsRedeemed', fill_value=0)


In [29]:
onehot_encoder = OneHotEncoder()
categories_encoded = onehot_encoder.fit_transform(new_user_transaction[['Categories']])

categories_df = pd.DataFrame(categories_encoded.toarray(), columns=onehot_encoder.get_feature_names_out(['Categories']))
new_user_transaction = pd.concat([new_user_transaction, categories_df], axis=1)


item_feature_matrix = new_user_transaction.groupby('FK_ContentId').agg({
    'PointsRedeemed': 'mean',
    **{col: 'mean' for col in categories_df.columns}
}).fillna(0)

item_feature_matrix = item_feature_matrix.reindex(user_item_matrix.columns).fillna(0)

# Normalize the item-feature matrix
scaler = StandardScaler()
item_feature_matrix_normalized = scaler.fit_transform(item_feature_matrix)


In [30]:
# Apply K-means clustering
kmeans = KMeans(n_clusters=4, random_state=42)
item_clusters = kmeans.fit_predict(item_feature_matrix_normalized)

user_item_sparse = csr_matrix(user_item_matrix.values)
combined_matrix = hstack([user_item_sparse.T, csr_matrix(item_feature_matrix.values)])

combined_matrix = combined_matrix.tocsr()


model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(combined_matrix)

In [31]:
def get_similar_items(item_id, n=10):
    """Function to get top n similar items for a given item_id"""
    item_idx = user_item_matrix.columns.get_loc(item_id)
    distances, indices = model_knn.kneighbors(combined_matrix[item_idx], n_neighbors=n+1)
    similar_items = sorted(zip(indices.squeeze().tolist(), distances.squeeze().tolist()), key=lambda x: x[1])[1:]
    similar_items = [(user_item_matrix.columns[i], dist) for i, dist in similar_items]
    return similar_items

In [32]:
def recommend_items(user_id, n=10):
    """Function to recommend top n items for a given user_id"""
    user_interactions = user_item_matrix.loc[user_id]
    interacted_items = user_interactions[user_interactions > 0].index
    recommendations = pd.Series(dtype=np.float64)

    for item in interacted_items:
        similar_items = get_similar_items(item, n)
        for similar_item, score in similar_items:
            if similar_item in recommendations.index:
                recommendations[similar_item] += score
            else:
                recommendations[similar_item] = score

    recommendations = recommendations.drop(interacted_items, errors='ignore')
    recommendations = recommendations.sort_values(ascending=True).head(n)  

    recommended_items_df = deals_data[deals_data['ContentId'].isin(recommendations.index)]

    return recommended_items_df

In [8]:
user_id = 994799
recommended_content_ids = recommend_items(user_id, 5)
recommended_content_ids

Unnamed: 0,ContentId,Title,FK_StatusId,Deal Type,Description,Location,Points,Categories
14,113829,Get 15% discount from Ghaseel,1,Discount,Get 15% discount from Ghaseel,https://www.instagram.com/ghaseel/,50,Automotive
49,113864,Off from Talabat,1,Voucher,Get 5KD discount from Talabat,https://goo.gl/maps/5uoJEdD95W1YSpDt9,5000,F&B
100,113915,Recharge your Deliveroo wallet with,1,Wallet Recharge,Recharge your Deliveroo wallet with 3KD,https://www.instagram.com/deliveroo_kw/,3000,On-demand
101,113916,Recharge your Deliveroo wallet with,1,Wallet Recharge,Recharge your Deliveroo wallet with 5KD,https://www.instagram.com/deliveroo_kw/,5000,F&B
184,115191,Create your memory with TAM & BBT souvenir!,1,Voucher,Redeem your point and get your TAM and BBT sou...,https://eur03.safelinks.protection.outlook.com...,1,F&B
