# Metrics comparision

In [8]:
# Import Module
import os
import pandas as pd
import re
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer #pip install sklearn
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer #pip install sentence-transformers

# Folder path
path = "/Users/snigdharao/Desktop/project"
model = SentenceTransformer('bert-base-nli-mean-tokens')


In [9]:
def series_to_list(series):
    list =[]
    for s in series:
        s = re.sub("[\(\[]", "", str(s))
        s = re.sub("[\)\]]", "", str(s))
        s = re.sub("'", "", str(s))
        list = [k for k in s.split(",")]
    return list

In [10]:
def cv_score_calculator(token, sublist):
    sub_text = " ".join(sublist)
    res_text = " ".join(token)
    sentences = [sub_text,res_text]
    vectorizer = CountVectorizer()
    matrix = vectorizer.fit_transform(sentences)
    score = cosine_similarity(matrix)[0][1]*100
    return round(score,2)

In [11]:
def tf_idf_score_calculator(token, sublist):
    sub_text = " ".join(sublist)
    res_text = " ".join(token)
    sentences = [sub_text,res_text]
    vectorizer = TfidfVectorizer()
    matrix = vectorizer.fit_transform(sentences)
    score = cosine_similarity(matrix)[0][1] * 100
    return round(score, 2)

In [12]:
def bert_score_calculator(token, sublist):
    sub_text = " ".join(sublist)
    res_text = " ".join(token)
    sentences = [sub_text,res_text]
    sentence_embeddings = model.encode(sentences)
    score = cosine_similarity([sentence_embeddings[0]],sentence_embeddings[1:])
    return round(score[0][0]*100,2)

In [13]:
def resume_selector():
    # Change the directory

    resume_list = pd.read_csv('Resume_list.csv')
    job_descript = pd.read_csv('Sublist.csv')
    candidate_data= pd.DataFrame(resume_list["File name"].tolist(),columns=['candidate_name'])
    job_list =[]
    
    job_role =job_descript.loc[[0]]['JobRole'].to_string()
    job_role = job_role.split("   ")[1]
    job_list.append(job_role)

    print(f"+++++ Processing Resumes for Job role {job_role} +++++")
    sublist = series_to_list(job_descript.loc[[0]]['Sublist'])
    cv_score=[]
    tf_idf_score=[]
    bert_score=[]

    for j in range(len(resume_list)):
        print(resume_list.loc[[j]]["File name"].to_string())
        token_list = series_to_list(resume_list.loc[[j]]["Token_list"])
        # CountVectorizer
        score = cv_score_calculator(token_list,sublist)
        cv_score.append(score)

        score = tf_idf_score_calculator(token_list, sublist)
        tf_idf_score.append(score)

        score = bert_score_calculator(token_list, sublist)
        bert_score.append(score)

    cv_score_df = pd.DataFrame(cv_score, columns=['cv_score'])
    tf_idf_score_df = pd.DataFrame(tf_idf_score, columns=['tf_idf_score'])
    bert_score_df = pd.DataFrame(bert_score, columns=['bert_score'])
    candidate_data = pd.concat([candidate_data,cv_score_df,tf_idf_score_df,bert_score_df],axis=1)
    candidate_data.index = candidate_data['candidate_name']
    candidate_data = candidate_data.iloc[:, 1:]
    print(candidate_data)
    candidate_data.to_csv('MetricScore.csv')


In [14]:
resume_selector()

+++++ Processing Resumes for Job role  Business_analyst +++++
0    Abiral_Pandey_Fullstack_Java.docx
1    Achyuth Resume_8.docx
2    Adelina_Erimia_PMP1.docx
3    Adhi Gopalam - SM.docx
4    AjayKumar.docx
5    Akhil.profile.docx
6    Alekhya Resume.docx
7    Amar Sr BSA.docx
8    Ami Jape.docx
9    Amrinder Business Analyst.docx
10    Amulya Komatineni.docx
11    Anil Krishna Mogalaturthi.docx
12    AnilAgarwal.docx
13    Anudeep N_Sr Java Developer.docx
14    Ashok Jayakumar - PM.docx
15    Ashwini J2EE Developer.docx
16    Atul_Mathur_Resume.docx
17    Avathika BA-Healthcare_.docx
18    avinash G.docx
19    B Shaker-Sr BSA-Scrum Master .docx
20    B Suresh Kumar_Project Manager_1.docx
21    BA - Abhishek.docx
22    BA - Navneet.docx
23    BA Kiran.docx
24    BA with INV.docx
25    Balaji Gopalakrishnan Project Manager.docx
26    Balakrishna Sudabathula.docx
27    Bapuji Hadoop developer.docx
28    Bharat Arora_CV_PMP _ ERP1.docx
29    Bharatha BA Resume.docx
30    Brahma-Resume (BA)