# AI Matching Consistency Evaluation

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sentence_transformers import SentenceTransformer

import os, sys
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

from matcher import read_files, resume_to_job_analysis

## Read Files

In [3]:
resume_path = '../data/resumes/master_resumes.jsonl'
job_postings_path = '../data/job_postings/training_data.csv'

resumes, job_postings = read_files(resume_path, job_postings_path)

## Resume-to-Job Scoring Analysis

In [5]:
models = {'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
          'all-MiniLM-L12-v2': SentenceTransformer('all-MiniLM-L12-v2'),
          'paraphrase-MiniLM-L6-v2': SentenceTransformer('paraphrase-MiniLM-L6-v2')}

df = resume_to_job_analysis(resumes, job_postings[0:5], models)
df.to_csv('test.csv', index=False)
print(df)

    job_posting       resume  similarity_score               model_name
0         job_0    resume_12          0.493402         all-MiniLM-L6-v2
1         job_0    resume_15          0.493402         all-MiniLM-L6-v2
2         job_0    resume_45          0.493402         all-MiniLM-L6-v2
3         job_0    resume_60          0.493402         all-MiniLM-L6-v2
4         job_0    resume_80          0.424083         all-MiniLM-L6-v2
..          ...          ...               ...                      ...
145       job_4   resume_192          0.304623  paraphrase-MiniLM-L6-v2
146       job_4  resume_2822          0.301249  paraphrase-MiniLM-L6-v2
147       job_4  resume_4791          0.293399  paraphrase-MiniLM-L6-v2
148       job_4  resume_3345          0.293210  paraphrase-MiniLM-L6-v2
149       job_4  resume_1488          0.291847  paraphrase-MiniLM-L6-v2

[150 rows x 4 columns]


## Similarity Score Variance

In [13]:
var_df = df.groupby('model_name')['similarity_score'].agg(['var', 'std', 'mean'])
var_df = var_df.sort_values('var', ascending=False)
print(var_df)

                              var       std      mean
model_name                                           
paraphrase-MiniLM-L6-v2  0.009470  0.097316  0.424342
all-MiniLM-L12-v2        0.009140  0.095605  0.366303
all-MiniLM-L6-v2         0.004088  0.063937  0.374703


## Top Candidate Overlap

### END