In [1]:
import os
import sys
from pathlib import Path
# Setting the working directory to the root of the project
project_dir = Path("C:/Users/adbou/source/repos/KFHXRelatedAi/")
os.chdir(project_dir)

from Configs.GeneralPaths import SOURCEDATA
import pandas as pd
import numpy as np

In [2]:
#load user transactions data
reward_transactions = pd.read_excel(Path(SOURCEDATA / "New_Rewards_Trxs_Data.xlsx"))
reward_transactions.head()
reward_transactions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365425 entries, 0 to 365424
Data columns (total 20 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   EntityId                   365425 non-null  int64         
 1   EntityName                 365425 non-null  object        
 2   TrxId                      365425 non-null  int64         
 3   TrxDate                    365425 non-null  datetime64[ns]
 4   Acronym                    365425 non-null  object        
 5   MerchantName               365425 non-null  object        
 6   Currency                   365425 non-null  object        
 7   FK_BusinessUserId          365425 non-null  int64         
 8   PointsRewarded             365425 non-null  int64         
 9   FK_TierId                  365425 non-null  int64         
 10  Tier                       365425 non-null  object        
 11  Location                   365425 non-null  object  

In [8]:
updated_reward_transactions = reward_transactions.drop(columns=["MCC Details","MCC General Category"])

In [9]:
updated_reward_transactions.head()

Unnamed: 0,EntityId,EntityName,TrxId,TrxDate,Acronym,MerchantName,Currency,FK_BusinessUserId,PointsRewarded,FK_TierId,Tier,Location,TotalPaid,MCC,MCC More General Category,Month,Season,Detailed MCC
0,1594,Dose Café,18084252,2024-01-11 03:00:00,JUREAT AL SAADA CO KUWAIT KW,JUREAT AL SAADA CO,KWD,996464,150,1845,Solo,KW,1.5,5812,Restaurants and Food Services,1,Winter,Restaurants and Food Services
1,1594,Dose Café,18137458,2024-01-13 03:00:00,JUREAT AL SAADA CO KUWAIT KW,JUREAT AL SAADA CO,KWD,996464,125,1845,Solo,KW,1.25,5812,Restaurants and Food Services,1,Winter,Restaurants and Food Services
2,1594,Dose Café,18243795,2024-01-18 03:00:00,JUREAT AL SAADA CO KUWAIT KW,JUREAT AL SAADA CO,KWD,996464,125,1845,Solo,KW,1.25,5812,Restaurants and Food Services,1,Winter,Restaurants and Food Services
3,1656,Alpha Store,17819329,2024-01-01 03:00:00,ALNAHAR TECH COMPANY/A SHARQ KWT,ALNAHAR TECH COMPANY/A SHARQ,KWD,997483,77,1846,Hero,KW,3.89,5732,Retail Stores,1,Winter,Electronics and Appliances
4,1792,ZO Bakery,17820605,2024-01-02 03:00:00,ZO BAKERY WAIT KW,ZO BAKERY,KWD,1011087,166,1846,Hero,KW,1.66,5812,Restaurants and Food Services,1,Winter,Restaurants and Food Services


In [10]:
#loading user aggregates data
user_aggregates = pd.read_excel(Path(SOURCEDATA / "Reward_Trxs_User_Aggregates.xlsx"))
user_aggregates.head()
user_aggregates.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17828 entries, 0 to 17827
Data columns (total 85 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   FK_BusinessUserId        17828 non-null  int64  
 1   total_transactions       17828 non-null  int64  
 2   total_amount_spent       17828 non-null  float64
 3   avg_points_rewarded      17828 non-null  float64
 4   most_common_day_of_week  17828 non-null  int64  
 5   most_common_month        17828 non-null  int64  
 6   most_common_season       17828 non-null  int64  
 7   MCC_763                  17828 non-null  int64  
 8   MCC_3038                 17828 non-null  int64  
 9   MCC_3583                 17828 non-null  int64  
 10  MCC_4111                 17828 non-null  int64  
 11  MCC_4225                 17828 non-null  int64  
 12  MCC_4468                 17828 non-null  int64  
 13  MCC_4814                 17828 non-null  int64  
 14  MCC_4900              

Implicit ALS 

In [11]:
from scipy.sparse import csr_matrix
from implicit.als import AlternatingLeastSquares
from sklearn.model_selection import train_test_split


In [12]:
# Spliting the data into training and testing sets
train_data, test_data = train_test_split(updated_reward_transactions, test_size=0.2, random_state=42)

# Creating an interaction matrix for training data
train_interactions = train_data.pivot_table(index='FK_BusinessUserId', columns='MCC', values='TotalPaid', fill_value=0)

# Converting the interaction matrix to a sparse format
train_interactions_sparse = csr_matrix(train_interactions.values)

# Initializing the ALS model
model = AlternatingLeastSquares(factors=20, regularization=0.1, iterations=40)

# Training the model
model.fit(train_interactions_sparse)

  check_blas_config()
100%|██████████| 40/40 [00:01<00:00, 31.16it/s]


In [15]:
#fallback MCC (number) to detailed MCC Names
MCC_TO_DETAILED_MCC = updated_reward_transactions[['MCC', 'Detailed MCC']].drop_duplicates().set_index('MCC').to_dict()['Detailed MCC']

In [16]:
#cosine similarity algo to get similarities between items in the item factor resulting from Implicit ALS Model Training

from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np

# Creating an interaction matrix for items
item_interactions = train_interactions.T  # Transposing the train interaction to get items as rows
item_similarity = cosine_similarity(item_interactions)

# Converting to DataFrame for easier handling
item_similarity_df = pd.DataFrame(item_similarity, index=train_interactions.columns, columns=train_interactions.columns)

In [17]:
#Recommendation Function That Recommends MCC To User Based On His Transactions

def get_item_based_recommendations(user_id, train_interactions_df, item_similarity_df, num_items=10):
    # Getting the items the user has interacted with
    user_interactions = train_interactions_df.loc[user_id]
    interacted_items = user_interactions[user_interactions > 0].index
    
    # Calculating scores for all items based on similarity to interacted items
    scores = np.zeros(item_similarity_df.shape[0])
    for item in interacted_items:
        scores += item_similarity_df[item].values
    
    # Excluding the items the user has already interacted with
    scores = pd.Series(scores, index=item_similarity_df.index)
    scores = scores.drop(interacted_items)
    
    # Getting top N items
    top_items = scores.nlargest(num_items).index
    
    # Creating DataFrame for recommendations
    recommendations = pd.DataFrame({
        'MCC': top_items,
        'Name': [MCC_TO_DETAILED_MCC[mcc] for mcc in top_items],
        'Score': scores[top_items]
    })

    return recommendations

user_id = 976481
recommendations = get_item_based_recommendations(user_id, train_interactions, item_similarity_df, num_items=10)
print(f'Top 10 item-based recommendations for user {user_id}:')
recommendations


Top 10 item-based recommendations for user 976481:


Unnamed: 0_level_0,MCC,Name,Score
MCC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5812,5812,Restaurants and Food Services,1.217552
5399,5399,Retail Stores,1.152849
4814,4814,Technology and Communication Services,1.150295
5047,5047,Health and Medical,1.116791
5712,5712,Home and Property Services,1.069584
5977,5977,Personal Services - Health and Beauty Shops,1.027333
5732,5732,Electronics and Appliances,0.918937
5817,5817,Digital Goods-Applications,0.917681
7230,7230,Personal Services - Hair Care,0.868155
7523,7523,Ground Transportation,0.831866


In [18]:
#Function To Get User Transacion History With MCC (For Evaluation Purposes)
def get_user_purchases(user_id, updated_reward_transactions, MCC_TO_DETAILED_MCC):
    user_purchases = updated_reward_transactions[updated_reward_transactions['FK_BusinessUserId'] == user_id]
    user_purchases = user_purchases[['MCC', 'Detailed MCC']].drop_duplicates()
    user_purchases['Detailed MCC'] = user_purchases['MCC'].map(MCC_TO_DETAILED_MCC)
    user_purchases = user_purchases.groupby('MCC').first().reset_index()
    return user_purchases

user_purchases = get_user_purchases(user_id, updated_reward_transactions, MCC_TO_DETAILED_MCC)
print(f"User {user_id} purchases:")
user_purchases

User 976481 purchases:


Unnamed: 0,MCC,Detailed MCC
0,5499,Food and Beverage Stores
1,5651,Clothing and Accessories
2,5732,Electronics and Appliances
3,5940,Sports
4,7996,Entertainment
