In [1]:
from google.colab import drive
drive.mount('/content/drive')
import zipfile
import os


Mounted at /content/drive


In [None]:
zip_ref = zipfile.ZipFile('/content/drive/MyDrive/job
recommendation.zip', 'r') #Opens the zip file in read mode
zip_ref.extractall('/jobrecommendationDatasets') #Extracts the
files into the /tmp folder
zip_ref.close()

In [None]:
!pip install
git+https://github.com/PyTorchLightning/pytorch-lightning
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl

In [None]:
np.random.seed(123)

In [None]:
job_views = pd.read_csv('/jobrecommendationDatasets/Job_Views.csv')
print(len(job_views))

In [None]:
job_views_preprocess1 = pd.read_csv('/jobrecommendationDatasets/Job_Views.csv',usecols =[0,1,9,11])
max_value = job_views_preprocess1['View.Duration'].max()
print("max val=",max_value)

In [None]:
min_value = job_views_preprocess1['View.Duration'].min()
print("min value = ",min_value)
avg_value = job_views_preprocess1['View.Duration'].mean()
print("avg val=",avg_value)

In [None]:
new_min = 0
new_max = 5

In [None]:
def change_views_to_review(x):
  val = ((x-min_value)/(max_value-min_value))*(new_max-new_min) + new_min
  if (val>0) :
    return 1
  else:
    return 0

In [None]:
job_views_preprocess1['View.Duration'] = job_views_preprocess1['View.Duration'].apply(change_views_to_review)
print(job_views_preprocess1['View.Duration'].head())

In [None]:
job_views_preprocess1['View.Start']= pd.to_datetime(job_views_preprocess1['View.Start'])
print(job_views_preprocess1['View.Start'].head())

In [None]:
job_views_preprocess1['rank_latest'] = job_views_preprocess1.groupby(['Applicant.ID'])['View.Start'] \
.rank(method='first', ascending=False)


In [None]:
train_job_views = job_views_preprocess1[job_views_preprocess1['rank_latest'] != 1]
test_job_views = job_views_preprocess1[job_views_preprocess1['rank_latest'] == 1]

In [None]:
# drop columns that we no longer need
train_job_views = train_job_views[['Applicant.ID', 'Job.ID', 'View.Duration']]
test_job_views = test_job_views[['Applicant.ID', 'Job.ID', 'View.Duration']]
print(train_job_views.head())
print("train length=",len(train_job_views))
print(test_job_views.head())
print("test length=",len(test_job_views))


In [None]:
class JobTrainDataset(Dataset):
  def __init__(self, job_views_preprocess1, all_jobIds):
    self.users, self.items, self.labels =
    self.get_dataset(job_views_preprocess1, all_jobIds)
  def __len__(self):
    return len(self.users)
  def __getitem__(self, idx):
    return self.users[idx], self.items[idx], self.labels[idx]
  def get_dataset(self, ratings, all_jobIds):
    users, items, labels = [], [], []
    user_item_set =
    set(zip(job_views_preprocess1['Applicant.ID'],
    job_views_preprocess1['Job.ID']))
    num_negatives = 5
    for u, i in user_item_set:
      users.append(u)
      items.append(i)
      labels.append(1)
      for _ in range(num_negatives):
        negative_item = np.random.choice(all_jobIds)
        while (u, negative_item) in user_item_set:
          negative_item = np.random.choice(all_jobIds)
        users.append(u)
        items.append(negative_item)
        labels.append(0)
    return torch.tensor(users), torch.tensor(items), torch.tensor(labels)


In [None]:
class NCF(pl.LightningModule):
  """ Neural Collaborative Filtering (NCF)
  Args:
  num_users (int): Number of unique users
  num_items (int): Number of unique items
  job_views_preprocess1(pd.DataFrame): Dataframe
  containing the job viewed or not for training
  all_jobIds (list): List containing all_jobIds (train +
  test)
  """
  def __init__(self, num_users, num_items, job_views_preprocess1, all_movieIds):
    super().__init__()
    self.user_embedding =
    nn.Embedding(num_embeddings=num_users, embedding_dim=10)
    self.item_embedding =
    nn.Embedding(num_embeddings=num_items, embedding_dim=10)
    self.fc1 = nn.Linear(in_features=20, out_features=64)
    self.fc2 = nn.Linear(in_features=64, out_features=128)
    self.fc3 = nn.Linear(in_features=128, out_features=256)
    self.fc4 = nn.Linear(in_features=256, out_features=128)
    self.fc5 = nn.Linear(in_features=128, out_features=32)
    self.output = nn.Linear(in_features=32, out_features=1)
    self.job_views_preprocess1 = job_views_preprocess1
    self.all_jobIds = all_jobIds
  def forward(self, user_input, item_input):
    # Pass through embedding layers
    user_embedded = self.user_embedding(user_input)
    item_embedded = self.item_embedding(item_input)
    # Concat the two embedding layers
    vector = torch.cat([user_embedded, item_embedded], dim=-1)
    # Pass through dense layer
    vector = nn.ReLU()(self.fc1(vector))
    vector = nn.ReLU()(self.fc2(vector))
    vector = nn.ReLU()(self.fc3(vector))
    vector = nn.ReLU()(self.fc4(vector))
    vector = nn.ReLU()(self.fc5(vector))
    # Output layer
    pred = nn.Sigmoid()(self.output(vector))
    return pred
  def training_step(self, batch, batch_idx):
    user_input, item_input, labels = batch
    predicted_labels = self(user_input, item_input)
    loss = nn.BCELoss()(predicted_labels, labels.view(-1,1).float())
    return loss
  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters())
  def train_dataloader(self):
    return DataLoader(JobTrainDataset(self.job_views_preprocess1, self.all_jobIds),batch_size=512, num_workers=4)

In [None]:
num_users = job_views_preprocess1['Applicant.ID'].max()+1
num_items = job_views_preprocess1['Job.ID'].max()+1
all_jobIds = job_views_preprocess1['Job.ID'].unique()

In [None]:
model = NCF(num_users, num_items, train_job_views, all_jobIds)
trainer = pl.Trainer(max_epochs=25, gpus=1, reload_dataloaders_every_epoch=True, progress_bar_refresh_rate=50, logger=False, checkpoint_callback=False)
trainer.fit(model)

# User-item pairs for testing
test_user_item_set = set(zip(test_job_views['Applicant.ID'],
test_job_views['Job.ID']))
# Dict of all items that are interacted with by each user
user_interacted_items = job_views_preprocess1.groupby('Applicant.ID')['Job.ID'].apply(list).to_dict()
hits = []
cnt = 0
output = []
for (u,i) in tqdm(test_user_item_set):
  interacted_items = user_interacted_items[u]
  not_interacted_items = set(all_jobIds) - set(interacted_items)
  selected_not_interacted =
  list(np.random.choice(list(not_interacted_items), 99))
  test_items = selected_not_interacted + [i]
  predicted_labels = np.squeeze(model(torch.tensor([u]*100),torch.tensor(test_items)).detach().numpy())
  top10_items = [test_items[i] for i in
  np.argsort(predicted_labels)[::-1][0:10].tolist()]
  if(cnt ==0):
    print("userid:",u," recommended job id's:",top10_items,"
    actual jobid :",i)
    cnt += 1
    output.append(u)
    output.append(top10_items)
    output.append(i)
  if i in top10_items:
    hits.append(1)
  else:
    hits.append(0)

In [None]:
print("The Hit Ratio @ 10 is {:.2f}".format(np.average(hits)))

In [None]:
print("Recommended jobs for user with userid:",output[0]," are ")
print("============recommendations for you==============")
cnt = 1
job_names = []
company_names = []
for x in output[1]:
  job = jobs.loc[jobs['Job.ID'] == x, 'Title'].iloc[0]
  joblist = job.split("@")
  job_names.append(joblist[0].strip())
  if(len(joblist)>1):
    company_names.append(joblist[1])
  else:
    company_names.append("")
  print(cnt,")",job)
  cnt+= 1
print("=================================================")
print(job_names)
print(company_names)


In [None]:
#following are the images that are taken from google for display
#purpose for the users
im_job1 = Image.open('engineer.png')
im_job2 = Image.open('fireman.png')
im_job3 = Image.open('manager.png')
im_job4 = Image.open('pilot.png')
im_job5 = Image.open('policeman.png')
im_job6 = Image.open('priest.png')
im_job7 = Image.open('singer.png')
im_job8 = Image.open('waiter.png')
im_job9 = Image.open('welder.png')
im_job10 = Image.open('engineer.png')

In [None]:
image_name = []
image_name.append({'name':job_names[0], 'image':im_job1 })
image_name.append({'name':job_names[1], 'image':im_job2 })
image_name.append({'name':job_names[2], 'image':im_job3 })
image_name.append({'name':job_names[3], 'image':im_job4 })
image_name.append({'name':job_names[4], 'image':im_job5})
image_name.append({'name':job_names[5], 'image':im_job6})
image_name.append({'name':job_names[6], 'image':im_job7})
image_name.append({'name':job_names[7], 'image':im_job8})
image_name.append({'name':job_names[8], 'image':im_job9})
image_name.append({'name':job_names[9], 'image':im_job10})

In [None]:
from google.colab import widgets
import ipywidgets
from IPython.display import display, clear_output
import pandas as pd
print("following are your recommended jobs")
pd.set_option("max_colwidth", None)
# create output widget
output = ipywidgets.widgets.Output()

In [None]:
job_desc = ['description']
#job_info = [job_ids,job_names,company_names]
data_type = ["job ID","job role","job company"]
#df = pd.DataFrame(list(zip(job_col1, data_type)),columns=['',''])
df = pd.DataFrame(data_type,columns=[''])

In [None]:
def on_button_clicked(b):
  # Display the message within the output widget.
  with output:
    clear_output()
    print("job description")
    job_name = str(b.description)
    job_index = 0
    for i in range(len(job_names)):
      if(job_name == job_names[i]):
        job_index = i
        break
    df[job_desc[0]] =
    [job_ids[job_index],job_names[job_index],company_names[job_index]]
    print('\n', 'jobname: ', str(b.description), '\n')
    display(df)

In [None]:
# create a list of buttons, one for each star sign
button_list = []
for i in range(0,10):
  button =  ipywidgets.widgets.Button(description=image_name[i]['name'])
  button.on_click(on_button_clicked)
  button_list.append(button)
# arrange the job images and buttons into two rows
grid = widgets.Grid(1, 5, header_row=True, header_column=True)
newsize = (100, 100)
for (row, col) in grid:
  index = row*5+col
  print("\n")
  display(image_name[index]['image'].resize(newsize))
  display(button_list[index])
  for (row, col) in grid:
    index = row*5+col
    print("\n")
    display(image_name[index+5]['image'].resize(newsize))
    display(button_list[index+5])
    # display output
  #print('\nSelect your job Sign by clicking the button\n')
  display(output)
