In this section of the codes, a collaborative recommendation system for a small subset of users and jobs is implemented by simulating the value of number of applications by similar users variable. The sparsity of the dataset is studied and alternating least square method is applied on the dataset to obtain the recommendation list of similar jobs applied by similar users.

In [1]:
import pandas as pd

In [2]:
collab_job=pd.read_csv('collab_job.csv')

In [3]:
collab_job

Unnamed: 0,user_id,job_id,name
0,1,1,Computer Software Developer (Predictive Modeling)
1,1,2,Python Developer
2,1,3,Python Developer
3,1,4,BMI Consultant
4,1,5,Software Engineer - entry level
5,1,6,Sr. ETL/Informatica Developer
6,1,7,"Analyst, Application Developer"
7,1,8,Angular Developer
8,1,9,Services Information Developer Specialist--Pyt...
9,1,10,Software Engineer


In [4]:
import numpy as np
collab_job['events'] = np.random.randint(1, 10, collab_job.shape[0])

In [5]:
collab_job

Unnamed: 0,user_id,job_id,name,events
0,1,1,Computer Software Developer (Predictive Modeling),5
1,1,2,Python Developer,4
2,1,3,Python Developer,7
3,1,4,BMI Consultant,1
4,1,5,Software Engineer - entry level,4
5,1,6,Sr. ETL/Informatica Developer,5
6,1,7,"Analyst, Application Developer",7
7,1,8,Angular Developer,8
8,1,9,Services Information Developer Specialist--Pyt...,5
9,1,10,Software Engineer,3


In [6]:
from scipy import sparse
sparse_user_item = sparse.csr_matrix((collab_job['events'].astype(float), (collab_job['user_id'], collab_job['job_id'])))
print(sparse_user_item)

  (1, 1)	5.0
  (1, 2)	4.0
  (1, 3)	7.0
  (1, 4)	1.0
  (1, 5)	4.0
  (1, 6)	5.0
  (1, 7)	7.0
  (1, 8)	8.0
  (1, 9)	5.0
  (1, 10)	3.0
  (1, 11)	4.0
  (1, 12)	3.0
  (1, 13)	8.0
  (1, 14)	9.0
  (2, 1)	8.0
  (2, 2)	7.0
  (2, 3)	7.0
  (2, 4)	6.0
  (2, 5)	6.0
  (2, 6)	1.0
  (2, 7)	9.0
  (2, 8)	2.0
  (2, 9)	8.0
  (2, 10)	6.0
  (2, 11)	4.0
  :	:
  (13, 3)	1.0
  (13, 4)	2.0
  (13, 5)	7.0
  (13, 6)	9.0
  (13, 7)	4.0
  (13, 8)	8.0
  (13, 9)	6.0
  (13, 10)	5.0
  (14, 1)	6.0
  (14, 2)	3.0
  (14, 3)	2.0
  (14, 4)	6.0
  (14, 5)	8.0
  (14, 6)	7.0
  (14, 7)	7.0
  (14, 8)	3.0
  (14, 9)	7.0
  (15, 1)	9.0
  (15, 2)	6.0
  (15, 3)	8.0
  (15, 4)	5.0
  (15, 5)	6.0
  (15, 6)	3.0
  (15, 7)	9.0
  (15, 8)	9.0


In [7]:
# Number of possible interactions in the matrix
matrix_size = sparse_user_item.shape[0]*sparse_user_item.shape[1]

# Count of interactions
count_interactions = sparse_user_item.size

# Compute matrix sparsity
sparsity = 100*(1 - (float(count_interactions)/float(matrix_size)))

print(sparsity)

61.80555555555556


In [8]:
import implicit
# Set parameters
confidence_coef = 15
factors = 60
regularization = 0.1
iterations = 100

# Initialize model
model = implicit.als.AlternatingLeastSquares(factors=factors, regularization=regularization, iterations=iterations)

# Fit model
model.fit((sparse_user_item.T*confidence_coef).astype('double'))

# Get user and item vectors from our trained model
user_vecs = model.user_factors
item_vecs = model.item_factors



HBox(children=(IntProgress(value=0), HTML(value='')))




In [13]:
from sklearn.preprocessing import MinMaxScaler
# Get recommendations results

def recommend(user_id, sparse_user_item, user_vecs, item_vecs, num_items=10):
    user_interactions = sparse_user_item[user_id,:].toarray()
    user_interactions = user_interactions.reshape(-1) + 1
    user_interactions[user_interactions > 1] = 0
    rec_vector = user_vecs[user_id,:].dot(item_vecs.T)
    min_max = MinMaxScaler()
    rec_vector_scaled = min_max.fit_transform(rec_vector.reshape(-1,1))[:,0]
    recommend_vector = user_interactions * rec_vector_scaled
    #print(recommend_vector)
    item_idx = np.argsort(recommend_vector)[::-1][:num_items]
    job_rec_df=collab_job.iloc[item_idx]
    job_rec_df=job_rec_df.drop(['user_id', 'job_id', 'events'], axis=1)
    #return job_rec_df
    scores=[]
    for idx in item_idx:
        scores.append(recommend_vector[idx])
    job_rec_df['Scores']=scores
    return job_rec_df

In [14]:
user_id=12
recommendations = recommend(user_id, sparse_user_item, user_vecs, item_vecs)

print('APPLICATIONS HISTORY FOR USER : ' + str(user_id) + '\n')
print(collab_job[collab_job['user_id']==user_id][['name','events']])
print('\n RECOMMEND FOLLOWING JOBS \n')
print(recommendations)

APPLICATIONS HISTORY FOR USER : 12

                                                  name  events
135  Computer Software Developer (Predictive Modeling)       6
136                                   Python Developer       6
137                                   Python Developer       6

 RECOMMEND FOLLOWING JOBS 

                                                 name    Scores
5                       Sr. ETL/Informatica Developer  0.118293
4                     Software Engineer - entry level  0.108786
9                                   Software Engineer  0.033151
7                                   Angular Developer  0.028494
6                      Analyst, Application Developer  0.021730
8   Services Information Developer Specialist--Pyt...  0.020601
12                                 Investment Analyst  0.019206
15                                   Python Developer  0.018846
14  Computer Software Developer (Predictive Modeling)  0.018275
0   Computer Software Developer (Predictive