# Краткое описание

Данный ноутбук предназначен для того, чтобы проверять соответствие резюме заданным вакансиям

Система осуществляет подбор кандидату той вакансии, где он сможет наиболее полно раскрыть свой потенциал

# Resume-vacancy analysis

In [1]:
import time
import pandas as pd
import json
import random as rd
import numpy as np
from sklearn.metrics import accuracy_score
import re 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
#!pip install dostoevsky
from dostoevsky.tokenization import RegexTokenizer
from dostoevsky.models import FastTextSocialNetworkModel



In [3]:
!python -m dostoevsky download fasttext-social-network-model

## Analysis

In [4]:
with open('drive/MyDrive/LD final/data_file_it.json') as f:
    data = json.load(f)

In [5]:
vacs = pd.read_csv('drive/MyDrive/LD final/vacancy_all_it.csv', index_col=0)
vacs = vacs['description']

## Sentiment analysis for each resume and vacancy

Delete all symbols

In [6]:
res_list = []
for i in range(len(data)):
  resume = re.sub(r"[,\'{}\\\[\]]", "", str(data[i])).replace('xa0',' ')
  res_list.append(resume)

Emotion scoring

In [7]:
tokenizer = RegexTokenizer()
tokens = tokenizer.split('всё очень плохо')  # [('всё', None), ('очень', None), ('плохо', None)]

model = FastTextSocialNetworkModel(tokenizer=tokenizer)

results = model.predict(res_list, k=2)
res_values = []
for res, sentiment in zip(res_list, results):
    res_values.append([res_list.index(res),sentiment])




In [8]:
for i in range(len(res_values)):  
  res_values[i][1] = 1 - res_values[i][1]['neutral']

In [9]:
res_values[0] #emotion score for data[0] resume

[0, 0.22269010543823242]

### Matching resumes

Some functions

In [10]:
def index(textcv, textjd):
  '''
  Resume to vacancy score
  '''

  documents = [textjd, textcv]
  count_vectorizer = CountVectorizer()
  sparse_matrix = count_vectorizer.fit_transform(documents)
  doc_term_matrix = sparse_matrix.todense()
  df = pd.DataFrame(doc_term_matrix, 
              columns=count_vectorizer.get_feature_names(), 
              index=['textjd', 'textcv'])
  answer = cosine_similarity(df, df)
  answer = pd.DataFrame(answer)
  answer = answer.iloc[[1],[0]].values[0]
  answer = round(float(answer),4)*100

  return answer

In [11]:
def best_resume_vacancy_score():
  '''
  Searching vest resume for each vacancy
  '''

  best_match = []
  for i in range(len(vacs[:15])):
    vac = vacs[i]
    max_index = 0
    for k in range(len(res_list)):
      resume = res_list[k]
      index_temp = index(resume, vac)
      if index_temp > max_index:
        max_index = index_temp
        best_resume_number = k
    best_match.append([i, best_resume_number, max_index*(1+res_values[k][1])])

  return best_match

In [31]:
def all_resumes_to_vacancies():
  '''
  Function that creates dataframe with resume scores to all vacancies
  '''
  scores = []
  for i in range(len(data[:10])):
    resume = re.sub(r"[,\'{}\\\[\]]", "", str(data[i])).replace('xa0',' ')
    indexes_temp = []
    for k in range(len(vacs[:20])):
      vac = vacs[k]
      index_temp = index(resume, vac)
      indexes_temp.append(index_temp)
    scores.append(indexes_temp)
  
  return pd.DataFrame(scores)

In [13]:
best_resume_vacancy_score()

[[0, 46, 51.673154156208035],
 [1, 282, 15.349257299900056],
 [2, 46, 53.670738875865936],
 [3, 327, 25.153494946956634],
 [4, 11, 31.50214063644409],
 [5, 11, 32.58129559993744],
 [6, 361, 26.852589995861056],
 [7, 11, 25.497906105518343],
 [8, 327, 21.296089971065523],
 [9, 28, 39.25139170408249],
 [10, 245, 30.5951912522316],
 [11, 11, 36.209093136787416],
 [12, 11, 32.58129559993744],
 [13, 28, 44.3601572227478],
 [14, 316, 30.365583813190465]]

In [32]:
all_resumes_to_vacancies()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,7.0,5.11,7.45,7.3,9.34,8.87,10.08,6.93,5.12,5.99,9.04,9.94,8.87,10.0,9.96,4.55,4.51,9.0,10.19,6.66
1,11.78,4.9,12.11,10.37,15.58,14.57,11.96,10.44,8.86,11.1,14.66,16.09,14.57,10.66,13.17,11.66,11.57,14.98,20.28,16.33
2,12.77,6.89,12.33,11.1,11.21,9.48,15.45,8.32,11.9,11.54,7.82,11.04,9.48,11.24,10.8,12.22,12.49,13.54,13.23,13.4
3,7.91,2.19,6.31,10.15,14.38,13.48,8.03,7.76,4.12,11.03,11.81,16.41,13.48,13.42,12.16,9.45,9.38,11.77,15.56,10.96
4,12.74,6.09,12.86,14.34,14.53,14.54,15.52,14.02,13.35,15.83,13.36,14.59,14.54,16.22,12.7,13.56,14.13,19.95,17.56,24.11
5,2.4,1.9,2.55,2.51,3.4,4.12,3.33,3.36,1.78,3.98,4.39,4.27,4.12,3.2,5.43,2.91,2.89,6.28,2.95,6.82
6,10.79,12.16,11.12,10.14,11.9,11.5,10.72,11.03,9.95,11.23,8.63,12.6,11.5,14.02,10.37,13.26,13.16,17.76,14.72,20.13
7,11.03,4.01,11.73,9.68,13.15,14.68,10.22,12.42,9.57,10.64,13.5,15.0,14.68,11.05,12.47,10.46,10.66,17.94,15.45,18.09
8,20.26,6.02,16.68,17.16,11.17,15.48,14.06,12.03,5.86,11.02,15.44,13.26,15.48,12.69,11.69,9.11,9.42,11.78,14.83,9.31
9,6.05,6.82,6.04,9.43,7.92,9.48,8.35,8.04,5.25,7.62,9.28,8.63,9.48,7.38,8.81,8.17,7.53,10.39,9.53,10.49
