In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load your data
user_transactions = pd.read_excel(Path(SOURCEDATA / "Transaction_User.xlsx"))
deals_data = pd.read_excel(Path(SOURCEDATA / "Cleaned_Deals.xlsx"))

# Merge and preprocess data
new_user_transaction = user_transactions.merge(deals_data[['ContentId', 'Categories', 'Deal Type']], left_on='FK_ContentId', right_on='ContentId', how='left')
new_user_transaction = new_user_transaction.drop(columns=['ContentId', 'TrxId'])

# Encode user and item IDs
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

new_user_transaction['FK_BusinessUserId'] = user_encoder.fit_transform(new_user_transaction['FK_BusinessUserId'])
new_user_transaction['FK_ContentId'] = item_encoder.fit_transform(new_user_transaction['FK_ContentId'])

# Split into training and test sets
train_data, test_data = train_test_split(new_user_transaction, test_size=0.2, random_state=42)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout
from tensorflow.keras.models import Model

num_users = new_user_transaction['FK_BusinessUserId'].nunique()
num_items = new_user_transaction['FK_ContentId'].nunique()

# Model hyperparameters
embedding_size = 50
mlp_layers = [64, 32, 16, 8]
dropout_rate = 0.2

# GMF part
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')
user_embedding_gmf = Embedding(num_users, embedding_size, name='user_embedding_gmf')(user_input)
item_embedding_gmf = Embedding(num_items, embedding_size, name='item_embedding_gmf')(item_input)
gmf_vector = tf.keras.layers.Multiply()([user_embedding_gmf, item_embedding_gmf])
gmf_vector = Flatten()(gmf_vector)

# MLP part
user_embedding_mlp = Embedding(num_users, embedding_size, name='user_embedding_mlp')(user_input)
item_embedding_mlp = Embedding(num_items, embedding_size, name='item_embedding_mlp')(item_input)
mlp_vector = Concatenate()([user_embedding_mlp, item_embedding_mlp])
mlp_vector = Flatten()(mlp_vector)

for units in mlp_layers:
    mlp_vector = Dense(units)(mlp_vector)
    mlp_vector = Dropout(dropout_rate)(mlp_vector)
    mlp_vector = tf.keras.layers.ReLU()(mlp_vector)

# Concatenate GMF and MLP parts
neumf_vector = Concatenate()([gmf_vector, mlp_vector])
output = Dense(1, activation='sigmoid', name='output')(neumf_vector)

# Compiling the model
model = Model(inputs=[user_input, item_input], outputs=[output])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Preparing training data
X_train = [train_data['FK_BusinessUserId'].values, train_data['FK_ContentId'].values]
y_train = (train_data['PointsRedeemed'] > 0).astype(int)  # Assuming binary relevance for simplicity

# Training the model
history = model.fit(X_train, y_train, batch_size=256, epochs=10, validation_split=0.1, verbose=1)


In [None]:
def recommend_items(model, user_id, num_recommendations=10):
    if user_id not in user_encoder.classes_:
        print(f"User ID {user_id} not found in training data. Recommending popular items.")
        return recommend_items_for_new_user(num_recommendations)
    
    user_idx = user_encoder.transform([user_id])
    all_items = np.arange(num_items)
    user_item_pairs = np.column_stack((np.repeat(user_idx, num_items), all_items))

   
    scores = model.predict([user_item_pairs[:, 0], user_item_pairs[:, 1]], batch_size=256, verbose=0)
    scores = scores.flatten()

    
    top_items = np.argsort(-scores)[:num_recommendations]
    recommended_item_ids = item_encoder.inverse_transform(top_items)
    return recommended_item_ids

def recommend_items_for_new_user(num_recommendations=10):
    # Recommending popular items for new users
    popular_items = new_user_transaction['FK_ContentId'].value_counts().index[:num_recommendations]
    recommended_item_ids = item_encoder.inverse_transform(popular_items)
    return recommended_item_ids


In [None]:
user_id = 976480 
recommendations = recommend_items(model, user_id, 10)
print(f'Recommendations for user {user_id}: {recommendations}')

In [None]:
# Prepare test data
X_test = [test_data['FK_BusinessUserId'].values, test_data['FK_ContentId'].values]
y_test = (test_data['PointsRedeemed'] > 0).astype(int)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')


In [None]:
def get_test_interactions(test_data, user_id):
    return test_data[test_data['FK_BusinessUserId'] == user_id]['FK_ContentId'].unique()

def precision_at_k(recommendations, test_data, user_id, k):
    test_items = get_test_interactions(test_data, user_id)
    if len(test_items) == 0:
        return 0.0
    relevant_items = set(recommendations[:k]) & set(test_items)
    return len(relevant_items) / k

def recall_at_k(recommendations, test_data, user_id, k):
    test_items = get_test_interactions(test_data, user_id)
    if len(test_items) == 0:
        return 0.0
    relevant_items = set(recommendations[:k]) & set(test_items)
    return len(relevant_items) / len(test_items)

# Example usage
k = 10
precision_score = precision_at_k(recommendations, test_data, user_id, k)
recall_score = recall_at_k(recommendations, test_data, user_id, k)

print(f'Precision@{k}: {precision_score}')
print(f'Recall@{k}: {recall_score}')


New Improvements

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import os
import sys
from pathlib import Path
# Setting the working directory to the root of the project
project_dir = Path("C:/Users/adbou/source/repos/KFHXRelatedAi/")
os.chdir(project_dir)

from Configs.GeneralPaths import SOURCEDATA
# Load your data
user_transactions = pd.read_excel(Path(SOURCEDATA / "Transaction_User.xlsx"))
deals_data = pd.read_excel(Path(SOURCEDATA / "Cleaned_Deals.xlsx"))

# Merge and preprocess data
new_user_transaction = user_transactions.merge(deals_data[['ContentId', 'Categories', 'Deal Type']], left_on='FK_ContentId', right_on='ContentId', how='left')
new_user_transaction = new_user_transaction.drop(columns=['ContentId', 'TrxId'])

# Encode user and item IDs
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

new_user_transaction['FK_BusinessUserId'] = user_encoder.fit_transform(new_user_transaction['FK_BusinessUserId'])
new_user_transaction['FK_ContentId'] = item_encoder.fit_transform(new_user_transaction['FK_ContentId'])

# Split into training and test sets without stratification
train_data, test_data = train_test_split(new_user_transaction, test_size=0.2, random_state=42)

num_users = new_user_transaction['FK_BusinessUserId'].nunique()
num_items = new_user_transaction['FK_ContentId'].nunique()

# Model hyperparameters
embedding_size = 30
mlp_layers = [32, 16]
dropout_rate = 0.5

# GMF part
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')
user_embedding_gmf = Embedding(num_users, embedding_size, name='user_embedding_gmf', embeddings_regularizer=tf.keras.regularizers.l2(1e-4))(user_input)
item_embedding_gmf = Embedding(num_items, embedding_size, name='item_embedding_gmf', embeddings_regularizer=tf.keras.regularizers.l2(1e-4))(item_input)
gmf_vector = tf.keras.layers.Multiply()([user_embedding_gmf, item_embedding_gmf])
gmf_vector = Flatten()(gmf_vector)

# MLP part
user_embedding_mlp = Embedding(num_users, embedding_size, name='user_embedding_mlp', embeddings_regularizer=tf.keras.regularizers.l2(1e-4))(user_input)
item_embedding_mlp = Embedding(num_items, embedding_size, name='item_embedding_mlp', embeddings_regularizer=tf.keras.regularizers.l2(1e-4))(item_input)
mlp_vector = Concatenate()([user_embedding_mlp, item_embedding_mlp])
mlp_vector = Flatten()(mlp_vector)

for units in mlp_layers:
    mlp_vector = Dense(units, kernel_regularizer=tf.keras.regularizers.l2(1e-4))(mlp_vector)
    mlp_vector = BatchNormalization()(mlp_vector)
    mlp_vector = Dropout(dropout_rate)(mlp_vector)
    mlp_vector = tf.keras.layers.ReLU()(mlp_vector)

# Concatenate GMF and MLP parts
neumf_vector = Concatenate()([gmf_vector, mlp_vector])
output = Dense(1, activation='sigmoid', name='output')(neumf_vector)

# Compile the model
model = Model(inputs=[user_input, item_input], outputs=[output])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Prepare training data
X_train = [train_data['FK_BusinessUserId'].values, train_data['FK_ContentId'].values]
y_train = (train_data['PointsRedeemed'] > 0).astype(int)  # Assuming binary relevance for simplicity

# Add early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, batch_size=256, epochs=50, validation_split=0.1, verbose=1, callbacks=[early_stopping])

# Prepare test data
X_test = [test_data['FK_BusinessUserId'].values, test_data['FK_ContentId'].values]
y_test = (test_data['PointsRedeemed'] > 0).astype(int)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

def recommend_items(model, user_id, num_recommendations=10):
    if user_id not in user_encoder.classes_:
        print(f"User ID {user_id} not found in training data. Recommending popular items.")
        return recommend_items_for_new_user(num_recommendations)
    
    user_idx = user_encoder.transform([user_id])
    all_items = np.arange(num_items)
    user_item_pairs = np.column_stack((np.repeat(user_idx, num_items), all_items))

    # Predict scores for all items
    scores = model.predict([user_item_pairs[:, 0], user_item_pairs[:, 1]], batch_size=256, verbose=0)
    scores = scores.flatten()

    # Get top N recommendations
    top_items = np.argsort(-scores)[:num_recommendations]
    recommended_item_ids = item_encoder.inverse_transform(top_items)
    return recommended_item_ids

def recommend_items_for_new_user(num_recommendations=10):
    # Recommending popular items for new users
    popular_items = new_user_transaction['FK_ContentId'].value_counts().index[:num_recommendations]
    recommended_item_ids = item_encoder.inverse_transform(popular_items)
    return recommended_item_ids

# Example usage
user_id = 976480
recommendations = recommend_items(model, user_id, 10)
print(f'Recommendations for user {user_id}: {recommendations}')


Epoch 1/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - accuracy: 0.4031 - loss: 0.9193 - val_accuracy: 1.0000 - val_loss: 0.6220
Epoch 2/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.9162 - loss: 0.4613 - val_accuracy: 1.0000 - val_loss: 0.4169
Epoch 3/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.9898 - loss: 0.2668 - val_accuracy: 1.0000 - val_loss: 0.2446
Epoch 4/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.9980 - loss: 0.1654 - val_accuracy: 1.0000 - val_loss: 0.1414
Epoch 5/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.9995 - loss: 0.1128 - val_accuracy: 1.0000 - val_loss: 0.0860
Epoch 6/50
[1m107/107[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.9997 - loss: 0.0827 - val_accuracy: 1.0000 - val_loss: 0.0561
Epoch 7/50
[1m107/107