In [5]:
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import ast
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

# Setting the working directory to the root of the project
project_dir = Path("C:/Users/adbou/source/repos/KFHXRelatedAi/")
os.chdir(project_dir)

from Configs.GeneralPaths import SOURCEDATA


user_transactions = pd.read_excel(Path(SOURCEDATA / "Transaction_User.xlsx"))

new_user_transaction = user_transactions.drop(columns=['TrxId'])


deals_data = pd.read_excel(Path(SOURCEDATA / "Cleaned_Deals.xlsx"))
deals_data = deals_data.drop(columns=['Unnamed: 0'])


new_user_transaction = new_user_transaction.merge(deals_data[['ContentId', 'Categories']], left_on='FK_ContentId', right_on='ContentId', how='left')
new_user_transaction = new_user_transaction.drop(columns=['ContentId'])


new_user_transaction['Redeemed'] = new_user_transaction['PointsRedeemed'].apply(lambda x: 1 if x > 0 else 0)


user_item_matrix = new_user_transaction.pivot_table(index='FK_BusinessUserId', columns='FK_ContentId', values='Redeemed', fill_value=0)


user_category_matrix = new_user_transaction.pivot_table(index='FK_BusinessUserId', columns='Categories', values='Redeemed', aggfunc='sum', fill_value=0)


combined_matrix = pd.concat([user_item_matrix, user_category_matrix], axis=1).fillna(0)

# Applying SVD algorithm
n_components = 14  # Number of latent factors
svd = TruncatedSVD(n_components=n_components)
latent_matrix = svd.fit_transform(combined_matrix.values)
reconstructed_matrix = np.dot(latent_matrix, svd.components_)


deals_embeddings = pd.read_csv(Path(SOURCEDATA / "Deals_Embeddings.csv"))
deals_embeddings['ada_embedding'] = deals_embeddings['ada_embedding'].apply(ast.literal_eval)

def calculate_cosine_similarity(embedding, embeddings):
    similarities = cosine_similarity([embedding], embeddings)
    return similarities[0]

def recommend_items(user_id, combined_matrix, reconstructed_matrix, original_data, num_recommendations=5):
    
    user_index = combined_matrix.index.get_loc(user_id)
    
    
    user_scores = reconstructed_matrix[user_index]

    
    item_columns = original_data['FK_ContentId'].unique()
    category_columns = original_data['Categories'].unique()

   
    item_scores = user_scores[:len(item_columns)]
    category_scores = user_scores[len(item_columns):]

    
    interacted_items_indices = combined_matrix.iloc[user_index, :len(item_columns)][combined_matrix.iloc[user_index, :len(item_columns)] > 0].index.tolist()

   
    item_scores = list(enumerate(item_scores))
    
    
    item_scores = [item for item in item_scores if combined_matrix.columns[item[0]] not in interacted_items_indices]
    
  
    item_scores = sorted(item_scores, key=lambda x: x[1], reverse=True)

  
    top_item_indices = [item[0] for item in item_scores[:num_recommendations]]
    
    
    top_item_ids = combined_matrix.columns[top_item_indices].tolist()

   
    top_items_df = pd.DataFrame({'ContentId': top_item_ids, 'score': [score[1] for score in item_scores[:num_recommendations]]})

    return top_items_df

def recommend_deals_for_user(user_id, user_deals_df, deals_embeddings_new, num_recommendations=5):
   
    redeemed_content_ids = user_deals_df[user_deals_df['FK_BusinessUserId'] == user_id]['FK_ContentId'].unique()
    
    
    redeemed_embeddings = deals_embeddings_new[deals_embeddings_new['ContentId'].isin(redeemed_content_ids)]
    
    if redeemed_embeddings.empty:
        print("No redeemed deals found for this user.")
        return pd.DataFrame()
    
    
    all_embeddings = deals_embeddings_new['ada_embedding'].tolist()

    
    similarities = []

    
    for embedding in redeemed_embeddings['ada_embedding']:
        similarity = calculate_cosine_similarity(embedding, all_embeddings)
        similarities.append(similarity)

    
    average_similarities = np.mean(similarities, axis=0)

    
    deals_embeddings_new['similarity'] = average_similarities

    
    recommended_deals = deals_embeddings_new[~deals_embeddings_new['ContentId'].isin(redeemed_content_ids)]

   
    top_recommendations = recommended_deals.sort_values(by='similarity', ascending=False).head(num_recommendations)

    
    top_deals_df = top_recommendations[['ContentId', 'similarity']].rename(columns={'similarity': 'score'})

    return top_deals_df

def recommend(user_id, combined_matrix, reconstructed_matrix, original_data, user_transactions, deals_embeddings, deals_data, num_recommendations=5):
  
    user_transaction_count = user_transactions[user_transactions['FK_BusinessUserId'] == user_id].shape[0]
    
    if user_transaction_count <= 2:
        
        content_based_recommendations = recommend_deals_for_user(user_id, user_transactions, deals_embeddings, num_recommendations)
        
       
        final_recommendations = content_based_recommendations.merge(deals_data, left_on='ContentId', right_on='ContentId', how='left')
        return final_recommendations
    else:
      
        content_based_recommendations = recommend_deals_for_user(user_id, user_transactions, deals_embeddings, num_recommendations)
        collaborative_recommendations = recommend_items(user_id, combined_matrix, reconstructed_matrix, original_data, num_recommendations)

       
        combined_recommendations = pd.concat([content_based_recommendations, collaborative_recommendations])

      
        combined_recommendations = combined_recommendations.sort_values(by='score', ascending=False)

        top_collaborative = collaborative_recommendations.head(2)
        top_content_based = content_based_recommendations.head(2)

     
        selected_recommendations = pd.concat([top_collaborative, top_content_based])

      
        remaining_recommendations = combined_recommendations[~combined_recommendations['ContentId'].isin(selected_recommendations['ContentId'])]

        
        if not remaining_recommendations.empty:
            top_remaining = remaining_recommendations.head(1)
            selected_recommendations = pd.concat([selected_recommendations, top_remaining])

      
        top_combined_recommendations = selected_recommendations.head(num_recommendations)

        
        final_recommendations = top_combined_recommendations.merge(deals_data, left_on='ContentId', right_on='ContentId', how='left')
        return final_recommendations





user_id = combined_matrix.index[5] 
recommendations = recommend(user_id, combined_matrix, reconstructed_matrix, new_user_transaction, user_transactions, deals_embeddings, deals_data, num_recommendations=5)
recommendations


Unnamed: 0,ContentId,score,Title,FK_StatusId,Deal Type,Description,Location,Points,Categories
0,113916,0.755586,Recharge your Deliveroo wallet with,1,5 KD Wallet Recharge,Recharge your Deliveroo wallet with 5KD,https://www.instagram.com/deliveroo_kw/,5000,F&B
1,115244,0.737924,Recharge your Deliveroo wallet with,2,10KD Wallet Recharge,Recharge your Deliveroo wallet with 10KD,https://www.instagram.com/deliveroo_kw/,1000,F&B
2,113884,0.695977,Deliveroo Wallet recharge,2,25 KD Wallet Recharge,Recharge your Deliveroo wallet with 25KD,https://www.instagram.com/deliveroo_kw/,25000,On-demand
3,113885,0.642014,Get discount on your order from Caffeine Cafe,1,5KD discount,Get 5KD discount on your order from Caffeine Cafe,https://maps.app.goo.gl/AJ3ajc6wGTbDcbiv7,5000,F&B
4,115108,0.624737,Get discount on your next order from Carousel,2,5KD discount,\n Get 5KD\n discount on your next order fro...,https://www.google.com/maps/place/Carousel/@29...,5000,F&B
