In [2]:
!wget -qq https://github.com/shenasa-ai/persian-book-recommender-dataset/raw/main/data.zip

In [3]:
!unzip -qq data.zip

In [None]:
!head sales.csv

In [4]:
import random
import pandas as pd
import numpy as np

sales_data = pd.read_csv('sales.csv')

In [5]:
purchased_data = sales_data[['member_id', 'good_id']].drop_duplicates()

In [6]:
purchased_data.head(10)

Unnamed: 0,member_id,good_id
0,5,15347
1,8,2833
2,8,8714
3,9,15672
4,12,15001
5,12,2718
6,12,2722
7,12,2721
8,15,698
9,15,1433


In [7]:
purchased_data['factorized_member_id'] = pd.factorize(purchased_data['member_id'])[0]
factorized_to_normal_member_id_mapping = dict(zip(purchased_data['factorized_member_id'], purchased_data['member_id']))
normal_to_factorized_member_id_mapping = dict(zip(purchased_data['member_id'], purchased_data['factorized_member_id']))

In [8]:
purchased_data['factorized_good_id'] = pd.factorize(purchased_data['good_id'])[0]
factorized_to_normal_good_id_mapping = dict(zip(purchased_data['factorized_good_id'], purchased_data['good_id']))
normal_to_factorized_good_id_mapping = dict(zip(purchased_data['good_id'], purchased_data['factorized_good_id']))

In [9]:
purchased_data.head(10)

Unnamed: 0,member_id,good_id,factorized_member_id,factorized_good_id
0,5,15347,0,0
1,8,2833,1,1
2,8,8714,1,2
3,9,15672,2,3
4,12,15001,3,4
5,12,2718,3,5
6,12,2722,3,6
7,12,2721,3,7
8,15,698,4,8
9,15,1433,4,9


In [10]:
purchased_set = set(zip(purchased_data['factorized_member_id'], purchased_data['factorized_good_id']))
len(purchased_set)

511139

In [11]:
unique_members = purchased_data['factorized_member_id'].unique()
unique_goods = purchased_data['factorized_good_id'].unique()
n_unique_members = len(unique_members)
n_unique_goods = len(unique_goods)

print(n_unique_members, n_unique_goods)

95695 45174


In [12]:
not_purchased_set = set()

while len(not_purchased_set)!=len(purchased_set):
    random_member_id = random.choice(unique_members)
    random_good_id = random.choice(unique_goods)
    random_tuple = (random_member_id, random_good_id)
    if random_tuple not in purchased_set:
        not_purchased_set.add(random_tuple)

In [13]:
x_np = np.array(list(not_purchased_set))
x_p = np.array(list(purchased_set))

X = np.concatenate([x_np, x_p], axis=0)

y = np.zeros(len(X))
y[-len(x_p):] = 1

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_test.shape)

(817822, 2)
(204456, 2)


In [15]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.models import Model

user_input = Input(shape=[1], name="User-Input")
user_embedding = Embedding(n_unique_members, 5, name="User-Embedding")(user_input)
user_vec = Flatten(name="Flatten-User")(user_embedding)

product_input = Input(shape=[1], name="Product-Input")
product_embedding = Embedding(n_unique_goods, 5, name="Product-Embedding")(product_input)
product_vec = Flatten(name="Flatten-Product")(product_embedding)

concat = tf.keras.layers.Concatenate(name="Concatenate")([product_vec, user_vec])
Dense1 = tf.keras.layers.Dense(10, activation='relu')(concat)
drop = tf.keras.layers.Dropout(0.5)(Dense1)
out = tf.keras.layers.Dense(1)(drop)

model = Model([user_input, product_input], out)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])


In [16]:
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Define the callback
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, min_lr=0.0001)
early_stopping = EarlyStopping(
    monitor="val_accuracy", patience=10, restore_best_weights=True
)

history = model.fit([X_train[:,0],
                     X_train[:,1]],
                    y_train, epochs=50,
                    batch_size=1024,
                    validation_split=0.2,
                    callbacks=[reduce_lr],)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [18]:
model.evaluate([X_train[:,0],X_train[:,1]], y_train)



[0.4068845212459564, 0.9007106423377991]

In [19]:
model.evaluate([X_test[:,0],X_test[:,1]], y_test)



[1.4994908571243286, 0.8104384541511536]