In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('../../data/project_scores.csv')

In [3]:
df

Unnamed: 0,user_id,project_id,score
0,5f5a90e2-6601-480c-bc08-7592e06beacd,45ba1903-cafe-4e57-b7ec-1de402d32cc1,1
1,5f5a90e2-6601-480c-bc08-7592e06beacd,22a9acc5-428a-41d4-b056-9bebe9183afe,1
2,0db6b893-14aa-4ae5-8e57-692ca7943ab5,37e5167f-10d9-429d-99c4-260d68e39307,2
3,0db6b893-14aa-4ae5-8e57-692ca7943ab5,467f58b6-c640-493a-aceb-36dea9881778,2
4,5f5a90e2-6601-480c-bc08-7592e06beacd,6f6235eb-a751-43f2-8bad-506c884635db,4
5,0db6b893-14aa-4ae5-8e57-692ca7943ab5,41edbc3e-2636-4806-bd87-4c19f273bb3c,4
6,0db6b893-14aa-4ae5-8e57-692ca7943ab5,45ba1903-cafe-4e57-b7ec-1de402d32cc1,4
7,0db6b893-14aa-4ae5-8e57-692ca7943ab5,6f6235eb-a751-43f2-8bad-506c884635db,4
8,5f5a90e2-6601-480c-bc08-7592e06beacd,41edbc3e-2636-4806-bd87-4c19f273bb3c,4


In [4]:
from sklearn.preprocessing import LabelEncoder

user_le = LabelEncoder()
df['user_id'] = user_le.fit_transform(df['user_id'])

project_le = LabelEncoder()
df['project_id'] = project_le.fit_transform(df['project_id'])

In [5]:
X = df[['user_id', 'project_id']]
y = df['score']

In [6]:
import tensorflow as tf
from keras.models import Model
from keras.layers import Input,Dense,Embedding,Flatten,Input,concatenate,Add
from keras.regularizers import l2

In [7]:
num_users = df['user_id'].nunique()
num_projects = df['project_id'].nunique()
k = 50
l2_lambda = 0.001

In [8]:
u_input = Input((1,), name='user_input')
u = Embedding(num_users, k, name='user_emb')(u_input)
u = Flatten(name='user_flat')(u)
u = Dense(48, activation='relu', name='user_dense')(u)

In [9]:
p_input = Input((1,), name='project_input')
p = Embedding(num_projects, k, name='project_emb')(p_input)
p = Flatten(name='project_flat')(p)
p = Dense(8, activation='relu', name='project_dense')(p)

In [10]:
x = concatenate([u, p], name='concat')
# x = Dropout(0.1, name='drop1')(x)
x = Dense(16, activation='relu', name='dense1')(x)
x = Dense(4, activation='relu', name='dense2')(x)

In [11]:
u_bias = Embedding(num_users, 1, embeddings_regularizer=l2(l2_lambda), name='user_bias_emb')(u_input)
u_bias = Flatten(name='user_bias_flat')(u_bias)

p_bias = Embedding(num_projects, 1, embeddings_regularizer=l2(l2_lambda), name='project_bias_emb')(p_input)
p_bias = Flatten(name='project_bias_flat')(p_bias)

In [12]:
o = concatenate([x, u_bias, p_bias], name='combined_features')
o = Dense(16, activation='relu', name='combined_dense1')(o)
o = Dense(4, activation='relu', name='combined_dense2')(o)
o = Dense(1, activation='linear' , name='output')(o)

In [13]:
from tensorflow.keras.metrics import MeanSquaredError, MeanAbsoluteError, RootMeanSquaredError

model = Model(inputs=[u_input, p_input], outputs=o)

metrics = [
    MeanSquaredError(name='mse'),
    MeanAbsoluteError(name='mae'),
    RootMeanSquaredError(name='rmse')
]

model.compile(optimizer=tf.keras.optimizers.legacy.Adam(0.001), loss='mean_squared_error', metrics = metrics)

In [14]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 project_input (InputLayer)  [(None, 1)]                  0         []                            
                                                                                                  
 user_emb (Embedding)        (None, 1, 50)                100       ['user_input[0][0]']          
                                                                                                  
 project_emb (Embedding)     (None, 1, 50)                300       ['project_input[0][0]']       
                                                                                              

In [15]:
from keras.utils import plot_model

plot_model(
    model,
    show_shapes=True,
    show_layer_names=True,
    expand_nested=True,
    show_layer_activations=True,
)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [16]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0.1,
    patience=2,
    verbose=1,
    mode="min",
    baseline=None,
    restore_best_weights=True
)

In [17]:
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('model.h5',monitor='val_loss', verbose=1, save_best_only=True)

In [18]:
history = model.fit(
    x=[X['user_id'], X['project_id']],
    y=y, epochs=200,
    # validation_data=([X_val['userId'], X_val['movieId']], y_val),
    # callbacks=[early_stopping, model_checkpoint, model_weights_checkpoint]
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [19]:
tf.keras.models.save_model(model, '../../models/projects/recommendations.h5')

  tf.keras.models.save_model(model, '../../models/projects/recommendations.h5')


In [21]:
user_embeddings = {}

counter = 0

for user_id in df['user_id'].unique():
    user_embedding = model.get_layer('user_emb')(np.array([user_id]))
    user_embedding = tf.keras.backend.flatten(user_embedding)
    user_embedding = tf.expand_dims(user_embedding, axis=0)
    user_dense = model.get_layer('user_dense')(user_embedding)
    
    user_embeddings[user_id] = user_dense.numpy()

    counter+=1

    if counter%10000==0:
        print(f"Completed {counter} Iterations")

In [24]:
project_embeddings = {}

counter = 0

for project_id in df['project_id'].unique():
    project_embedding = model.get_layer('project_emb')(np.array([movie_id]))
    project_embedding = tf.keras.backend.flatten(project_embedding)
    project_embedding = tf.expand_dims(project_embedding, axis=0)
    project_dense = model.get_layer('project_dense')(project_embedding)
    
    project_embeddings[project_id] = project_dense.numpy()

    counter+=1

    if counter%10000==0:
        print(f"Completed {counter} Iterations")

In [27]:
user_bias_embeddings = {}

counter = 0

for user_id in df['user_id'].unique():
    user_bias_embedding = model.get_layer('user_bias_emb')(np.array([user_id]))
    user_bias_embedding = tf.keras.backend.flatten(user_bias_embedding)

    user_bias_embeddings[user_id] = user_bias_embedding.numpy()

    counter+=1

    if counter%10000==0:
        print(f"Completed {counter} Iterations")

In [28]:
project_bias_embeddings = {}

counter = 0

for project_id in df['project_id'].unique():
    project_bias_embedding = model.get_layer('project_bias_emb')(np.array([project_id]))
    project_bias_embedding = tf.keras.backend.flatten(project_bias_embedding)
    
    project_bias_embeddings[project_id] = project_bias_embedding.numpy()

    counter+=1

    if counter%10000==0:
        print(f"Completed {counter} Iterations")

In [43]:
import pickle

with open('../../models/projects/user_embeddings.json', 'wb') as f:
    pickle.dump(user_embeddings, f)

with open('../../models/projects/movie_embeddings.json', 'wb') as f:
    pickle.dump(project_embeddings, f)

with open('../../models/projects/user_bias_embeddings.json', 'wb') as f:
    pickle.dump(user_bias_embeddings, f)

with open('../../models/projects/movie_bias_embeddings.json', 'wb') as f:
    pickle.dump(project_bias_embeddings, f)

In [30]:
def predict(user_id, project_id):
    user_embedding = user_embeddings[user_id]
    project_embedding = project_embeddings[project_id]

    # Passing user embedding and movie embedding through the concat layer
    concatenated_embeddings = model.get_layer('concat')([user_embedding, project_embedding])

    # Passing the concatenated embeddings through the dense layers
    x = model.get_layer('dense1')(concatenated_embeddings)
    x = model.get_layer('dense2')(x)

    user_bias_embedding = user_bias_embeddings[user_id]
    project_bias_embedding = project_bias_embeddings[project_id]

    user_bias_embedding = user_bias_embedding[:, tf.newaxis]
    project_bias_embedding = project_bias_embedding[:, tf.newaxis]

    # Combine embeddings, biases, and pass through the output layer
    input_tensors = [x, user_bias_embedding, project_bias_embedding]
    concatenated_features = model.get_layer('combined_features')(input_tensors)
    
    x = model.get_layer('combined_dense1')(concatenated_features)
    x = model.get_layer('combined_dense2')(x)
    
    x = model.get_layer('output')(x)

    predicted_rating = x[0][0]

    return predicted_rating.numpy()

In [31]:
def user_recommendations(user_id):
    user_ratings = df[df['user_id'] == user_id]
    user_ratings = df[df['project_id'] != 1]
    recommendation = df[~df['project_id'].isin(user_ratings['project_id'])][['project_id']].drop_duplicates()
    recommendation['score_predict'] = recommendation.apply(lambda x: predict(user_id, x['project_id']), axis=1)
    
    final_rec = recommendation.sort_values(by='score_predict', ascending=False)
    return final_rec

In [36]:
rec = user_recommendations(1)
rec.head()

Unnamed: 0,project_id,score_predict
2,1,1.918079


In [37]:
def test_predictions(user_id):
    user_ratings = df[df['user_id'] == user_id]
    
    user_top_ratings = user_ratings.sort_values(by='score', ascending=False)

    comparision = df[df['project_id'].isin(user_ratings['project_id'])][['project_id']].drop_duplicates()
    comparision['score_predict'] = comparision.apply(lambda x: predict(user_id, x['project_id']), axis=1)
    comparision = user_top_ratings.merge(comparision, on='project_id')

    return comparision

In [40]:
comparision = test_predictions(0)
comparision

Unnamed: 0,user_id,project_id,score,score_predict
0,0,2,4,3.007643
1,0,3,4,3.001284
2,0,5,4,3.009952
3,0,1,2,2.979149
4,0,4,2,2.986011
