In [73]:
import numpy as np
import pandas as pd
import json
from collections import Counter
from nltk.stem.snowball import SnowballStemmer


pd.set_option('display.max_columns', 10000)
pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_colwidth', 1000)
pd.set_option('display.width', 1000)

## Data Input Function

In [74]:
######################### Importing 1st Data-set ###################
vacan=json.load(open('Vacancies 2.json'))
########################## Importing 2nd Data-Set #################
emp=json.load(open('employees2.json'))



In [75]:
##Code for Standardizing Dataset 2 According to format of Dataset 1
for i in emp:
  for m,k in i['Location'].items():
    i[str(m)]=str(k)
  del i['Location']

Basic Functions

In [76]:

# Data Standardizing (Lower casing with one spacing in list sub-elements)
def standard_list(list_1):
    dummy_list_1=[]
    for i in list_1:
        dummy_list_1.append(" ".join(i.lower().split()))
    return dummy_list_1

### Stemming(sno) function (only standardize list) returning stemmed list

def stem_list(list_stem):
  sno = SnowballStemmer('english')
  sl1=[]
  for i in list_stem:
    i=" ".join(i.lower().split())
    sl2=(list(i.split(" ")))
    sl3=[]
    for k in sl2:
      sl3.append(sno.stem(k))
    sl4=' '.join([str(item) for item in sl3])
    #print(sl4)
    sl1.append(sl4)
  return sl1




In [77]:
 def standardization(input_list):
  # Standardizing soft and hard skills lists and eleminating repeated elements #
  out_list1=[]
  for i in input_list:
    c_dict=i.copy()
    for m,k in c_dict.items():
      if m=='softSkills' or m=='hardSkills':
        if isinstance(i[m],str):
          k = k.split(",")
          k = list(set(k))
          c_dict[m]=k
        else:
          k = list(set(k))
          c_dict[m]=k
      else:
        c_dict[m]=k
    out_list1.append(c_dict)

  out_list2=[]
  for b in out_list1:
    d_dict=b.copy()
    for a in range(0,2): 
        for q,r in d_dict.items():
          if isinstance(d_dict[q],list):
            r=standard_list(r)
            d_dict[q]=r
          if q== 'hardSkills':
            d_dict['hardSkills']=stem_list(r)
    
    out_list2.append(d_dict)
  return out_list2

In [78]:
### Standardizing both datasets for model input
vacan=standardization(vacan)
emp=standardization(emp)

# Model Initiation

In [79]:
########### Hard skill matching ########
from difflib import SequenceMatcher

def sequence_similarity(a, b):
    return SequenceMatcher(None, a, b).ratio()

def equal_similarity_s(js,jd):
  equal_list=[]
  for i in jd:
    for k in js:
      if sequence_similarity(i,k) > 0.70 :
        equal_list.append(1)
  return ((sum(equal_list)/len(js)))


def equal_similarity_h(js,jd):
  equal_list=[]
  for i in jd:
    for k in js:
      if i==k :
        equal_list.append(1)
  return ((sum(equal_list)/len(js)))

In [80]:
###Scoring With sequence Algorithm
def equal_model(job_seeker,job_description):

  spacy_score=[]
  spacy_score.append(equal_similarity_s(job_seeker['softSkills'],job_description['softSkills']))#1
  hard_skills=(equal_similarity_h(job_seeker['hardSkills'],job_description['hardSkills']))#2
  spacy_score.append(hard_skills)
  exp_div=float((job_seeker['yearsOfExperience'])/(job_description['Experience']))#3
  exp_div=exp_div*hard_skills

  if exp_div>= 1:
    spacy_score.append(1)
  else:
    spacy_score.append(exp_div)

  sal_div=float((job_description['Salary'])/(job_seeker['salaryAim']))#4

  if sal_div >= 1:
    spacy_score.append(1)
  else:
    spacy_score.append(sal_div)
  
  if job_seeker['Country']== job_description['Country'] and job_seeker['City']== job_description['City']:#5
    spacy_score.append(1)
  elif job_seeker['Country']== job_description['Country']:
    spacy_score.append(0.5)
  else:
    spacy_score.append(-0.5)

  spacy_score=list(spacy_score)
  weights_list=[0.15,0.25,0.25,0.20,0.15]
  spacy_model_score=np.average(spacy_score, weights=weights_list) *100
  return spacy_model_score



## **Precription**

In [81]:
#################################### Job Precption ########################## 

res_dict={}
for sa in emp:
  c=[]
  ml=[]
  for i in vacan:
    b={}
    score=equal_model(sa,i)
    b['ID']=str(i['ID'])
    b['Title']=str(i['title'])
    b['Soft Skills']=str(i['softSkills'])
    b['Hard Skills']=str(i['softSkills'])
    b['Score']=equal_model(sa,i)
    c.append(b)

  ml= sorted(c, key=lambda x:x['Score'])
  ml=ml[-3:]
  ml=ml[::-1]
  res_dict[str(sa['Name'])]=ml




In [83]:
######### Saving Json File ############
import json

with open('out_put.json', 'w') as fp:
    json.dump(res_dict,fp, sort_keys=True, indent=4)