In [1]:
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import ast
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from scipy.sparse import csr_matrix, hstack



project_dir = Path("C:/Users/adbou/source/repos/KFHXRelatedAi/")
os.chdir(project_dir)

from Configs.GeneralPaths import SOURCEDATA


user_transactions = pd.read_excel(Path(SOURCEDATA / "Transaction_User.xlsx"))
new_user_transaction = user_transactions.drop(columns=['TrxId'])

deals_data = pd.read_excel(Path(SOURCEDATA / "Cleaned_Deals.xlsx"))
deals_data = deals_data.drop(columns=['Unnamed: 0'])

new_user_transaction = new_user_transaction.merge(deals_data[['ContentId', 'Categories']], left_on='FK_ContentId', right_on='ContentId', how='left')
new_user_transaction = new_user_transaction.drop(columns=['ContentId'])


user_item_matrix = new_user_transaction.pivot_table(index='FK_BusinessUserId', columns='FK_ContentId', values='PointsRedeemed', fill_value=0)


onehot_encoder = OneHotEncoder()
categories_encoded = onehot_encoder.fit_transform(new_user_transaction[['Categories']])

categories_df = pd.DataFrame(categories_encoded.toarray(), columns=onehot_encoder.get_feature_names_out(['Categories']))
new_user_transaction = pd.concat([new_user_transaction, categories_df], axis=1)


item_feature_matrix = new_user_transaction.groupby('FK_ContentId').agg({
    'PointsRedeemed': 'mean',
    **{col: 'mean' for col in categories_df.columns}
}).fillna(0)

item_feature_matrix = item_feature_matrix.reindex(user_item_matrix.columns).fillna(0)

# Normalize the item-feature matrix
scaler = StandardScaler()
item_feature_matrix_normalized = scaler.fit_transform(item_feature_matrix)


# Apply K-means clustering
kmeans = KMeans(n_clusters=4, random_state=42)
item_clusters = kmeans.fit_predict(item_feature_matrix_normalized)

user_item_sparse = csr_matrix(user_item_matrix.values)
combined_matrix = hstack([user_item_sparse.T, csr_matrix(item_feature_matrix.values)])

combined_matrix = combined_matrix.tocsr()


model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(combined_matrix)



deals_embeddings = pd.read_csv(Path(SOURCEDATA / "Deals_Embeddings.csv"))
deals_embeddings['ada_embedding'] = deals_embeddings['ada_embedding'].apply(ast.literal_eval)

def calculate_cosine_similarity(embedding, embeddings):
    similarities = cosine_similarity([embedding], embeddings)
    return similarities[0]

def get_similar_items(item_id, n=10):
    """Function to get top n similar items for a given item_id"""
    item_idx = user_item_matrix.columns.get_loc(item_id)
    distances, indices = model_knn.kneighbors(combined_matrix[item_idx], n_neighbors=n+1)
    similar_items = sorted(zip(indices.squeeze().tolist(), distances.squeeze().tolist()), key=lambda x: x[1])[1:]
    similar_items = [(user_item_matrix.columns[i], dist) for i, dist in similar_items]
    return similar_items

def recommend_items(user_id, n=10):
    """Function to recommend top n items for a given user_id"""
    user_interactions = user_item_matrix.loc[user_id]
    interacted_items = user_interactions[user_interactions > 0].index
    recommendations = pd.Series(dtype=np.float64)

    for item in interacted_items:
        similar_items = get_similar_items(item, n)
        for similar_item, score in similar_items:
            if similar_item in recommendations.index:
                recommendations[similar_item] += score
            else:
                recommendations[similar_item] = score

    recommendations = recommendations.drop(interacted_items, errors='ignore')
    recommendations = recommendations.sort_values(ascending=True).head(n)  

    recommended_items_df = deals_data[deals_data['ContentId'].isin(recommendations.index)]

    return recommended_items_df

def recommend_deals_for_user(user_id, user_deals_df, deals_embeddings_new, num_recommendations=5):
    redeemed_content_ids = user_deals_df[user_deals_df['FK_BusinessUserId'] == user_id]['FK_ContentId'].unique()
    
    redeemed_embeddings = deals_embeddings_new[deals_embeddings_new['ContentId'].isin(redeemed_content_ids)]
    
    if redeemed_embeddings.empty:
        print("No redeemed deals found for this user.")
        return pd.DataFrame()
    
    all_embeddings = deals_embeddings_new['ada_embedding'].tolist()
    
    similarities = []
    for embedding in redeemed_embeddings['ada_embedding']:
        similarity = calculate_cosine_similarity(embedding, all_embeddings)
        similarities.append(similarity)
    
    average_similarities = np.mean(similarities, axis=0)
    deals_embeddings_new['similarity'] = average_similarities
    
    recommended_deals = deals_embeddings_new[~deals_embeddings_new['ContentId'].isin(redeemed_content_ids)]
    top_recommendations = recommended_deals.sort_values(by='similarity', ascending=False).head(num_recommendations)
    
    top_deals_df = top_recommendations[['ContentId', 'similarity']].rename(columns={'similarity': 'score'})
    return top_deals_df

def recommend_top_popular(deals_data, num_recommendations=5):
    """Function to recommend top popular deals"""
    popular_deals = deals_data['ContentId'].value_counts().head(num_recommendations)
    return popular_deals.index.tolist()

def recommend(user_id, combined_matrix, user_transactions, deals_embeddings, deals_data, num_recommendations=5):
    user_transaction_count = user_transactions[user_transactions['FK_BusinessUserId'] == user_id].shape[0]
    
    if user_transaction_count == 1:
        content_based_recommendations = recommend_deals_for_user(user_id, user_transactions, deals_embeddings, num_recommendations)
        top_popular_recommendations = recommend_top_popular(deals_data, num_recommendations)
        
        
        content_based_ids = content_based_recommendations['ContentId'].sample(n=3).tolist()
        popular_ids = np.random.choice(top_popular_recommendations, 2, replace=False).tolist()
        final_recommendations_ids = content_based_ids + popular_ids
        final_recommendations = deals_data[deals_data['ContentId'].isin(final_recommendations_ids)]
    else:
        content_based_recommendations = recommend_deals_for_user(user_id, user_transactions, deals_embeddings, num_recommendations)
        collaborative_recommendations = recommend_items(user_id, n=num_recommendations)
        
        # Ensuring unique ContentIds
        collaborative_recommendations = collaborative_recommendations[~collaborative_recommendations['ContentId'].isin(content_based_recommendations['ContentId'])]
        
        
        top_collaborative_recommendations = collaborative_recommendations.head(4)
        top_content_based_recommendations = content_based_recommendations.head(1)
            
        combined_recommendations = pd.concat([top_collaborative_recommendations, top_content_based_recommendations])
        combined_recommendations = combined_recommendations.sort_values(by='score', ascending=False)
        
        final_recommendations = deals_data[deals_data['ContentId'].isin(combined_recommendations['ContentId'])]
    
    return final_recommendations


user_id = 994799
recommended_content_ids = recommend(user_id, combined_matrix, new_user_transaction, deals_embeddings, deals_data, num_recommendations=5)
recommended_content_ids


Unnamed: 0,ContentId,Title,FK_StatusId,Deal Type,Description,Location,Points,Categories
4,113819,Off from Talabat,1,Voucher,Get 3KD discount from Talabat.,https://goo.gl/maps/5uoJEdD95W1YSpDt9,3000,F&B
14,113829,Get 15% discount from Ghaseel,1,Discount,Get 15% discount from Ghaseel,https://www.instagram.com/ghaseel/,50,Automotive
49,113864,Off from Talabat,1,Voucher,Get 5KD discount from Talabat,https://goo.gl/maps/5uoJEdD95W1YSpDt9,5000,F&B
101,113916,Recharge your Deliveroo wallet with,1,Wallet Recharge,Recharge your Deliveroo wallet with 5KD,https://www.instagram.com/deliveroo_kw/,5000,F&B


In [2]:
user_last_interactions = user_item_matrix.loc[user_id]
user_interacted_items = user_last_interactions[user_last_interactions > 0].index
user_deals = deals_data[deals_data['ContentId'].isin(user_interacted_items)]
user_deals


Unnamed: 0,ContentId,Title,FK_StatusId,Deal Type,Description,Location,Points,Categories
36,113851,3-Months DeliverooPlus Subscription,1,Subscription,Get 3-months Deliveroo Plus subscription,https://www.instagram.com/deliveroo_kw/,10,F&B
144,115099,Get wallet recharge at Oula Fuel App.,1,Wallet Recharge,Get 5KD wallet recharge at Oula Fuel App.,https://www.google.com/maps/search/%D9%85%D8%A...,5000,Travel


In [3]:
user_trs = new_user_transaction[new_user_transaction["FK_BusinessUserId"] == user_id]
user_trs

Unnamed: 0,FK_BusinessUserId,PointsRedeemed,FK_ContentId,Categories,Categories_Automotive,Categories_Electronics,Categories_Entertainment,Categories_F&B,Categories_Health & Beauty,Categories_On-demand,Categories_Retail,Categories_Travel
10256,994799,10,113851,F&B,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
14711,994799,5000,115099,Travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
14712,994799,5000,115099,Travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
14717,994799,5000,115099,Travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
15640,994799,5000,115099,Travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
15641,994799,5000,115099,Travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
15642,994799,5000,115099,Travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
17244,994799,5000,115099,Travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
19103,994799,5000,115099,Travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
21221,994799,5000,115099,Travel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
