In [1]:
import _resume_eval_import_helper

In [2]:
import json 
from pathlib import Path
from tqdm.auto import tqdm

import pandas as pd

# consolidate results


## GPT-4o


In [3]:
folder_path = Path("output_20240831_1144")
json_files = list(folder_path.glob("*.json"))

gpt4o_verdict = {}

for file in tqdm(json_files, desc="Processing files"):
  jd_id, cv_id = file.stem.split("_")[:2]
  with open(file, "r") as f:
    data = json.load(f)
    try:
      verdict = data["assessment"]["suitability"]
    except:
      verdict = None
    gpt4o_verdict[(jd_id, cv_id)] = verdict
    
    
gpt4o_verdict = pd.DataFrame(gpt4o_verdict, index=["suitability"]).T.reset_index().rename(columns={"level_0": "job_id", "level_1": "cv_id", "suitability": "gpt4o"})


Processing files:   0%|          | 0/704 [00:00<?, ?it/s]

## llama3 70b


In [4]:
folder_path = Path("output_20240831_0124")
json_files = list(folder_path.glob("*.json"))

llama3_70b_verdict = {}

for file in tqdm(json_files, desc="Processing files"):
  jd_id, cv_id = file.stem.split("_")[:2]
  with open(file, "r") as f:
    data = json.load(f)
    verdict = data["assessment"]["suitability"]
    
    llama3_70b_verdict[(jd_id, cv_id)] = verdict

llama3_70b_verdict = pd.DataFrame(llama3_70b_verdict, index=["suitability"]).T.reset_index().rename(columns={"level_0": "job_id", "level_1": "cv_id", "suitability": "llama3_70b"})

Processing files:   0%|          | 0/705 [00:00<?, ?it/s]

## previous results


In [5]:
import pandas as pd
from tqdm.auto import tqdm

output_dir = Path("output_20240830_0136")

df = pd.DataFrame()
csv_files = output_dir.glob("*.csv")

for file in tqdm(csv_files, desc="Processing files"):
  
  job_id, cv_id = file.name.split("_")[:2]
  df_ = pd.read_csv(file)
  df_["job_id"] = job_id
  df_["cv_id"] = cv_id[:-4]
  df = pd.concat([df, df_], axis=0)

Processing files: 0it [00:00, ?it/s]

In [6]:
df_ = df.pivot_table(index=["job_id", "cv_id"], columns="Model", values="suitability", aggfunc="first").reset_index()

In [7]:
df_.columns, llama3_70b_verdict.columns, gpt4o_verdict.columns

(Index(['job_id', 'cv_id', 'anthropic', 'gpt', 'llama3'], dtype='object', name='Model'),
 Index(['job_id', 'cv_id', 'llama3_70b'], dtype='object'),
 Index(['job_id', 'cv_id', 'gpt4o'], dtype='object'))

In [8]:
df_.shape, llama3_70b_verdict.shape, gpt4o_verdict.shape

((692, 5), (705, 3), (704, 3))

In [9]:
final_verdict = pd.merge(llama3_70b_verdict, gpt4o_verdict, on=["job_id", "cv_id"], how="left")
final_verdict = pd.merge(final_verdict, df_, on=["job_id", "cv_id"], how="left")

final_verdict.sort_values(by=["job_id", "cv_id"], inplace=True)
final_verdict.to_csv("output/final_verdict.csv", index=False)

## add job description and cv


In [10]:
folder_path = Path("output")

job_pool = pd.read_csv("output/filtered_job_description.csv")
talent_pool = pd.read_csv("output/filtered_talent_pool.csv")

job_pool.rename(columns={"Job ID": "job_id", "Job Description": "job_description", "Job Title": "job_title"}, inplace=True)
talent_pool.rename(columns={"ID": "cv_id", "Resume": "cv", "Category": "cv_category"}, inplace=True)

In [11]:
final_verdict = pd.merge(job_pool, final_verdict, on="job_id", how="left")
final_verdict = pd.merge(talent_pool, final_verdict, on="cv_id", how="left")
final_verdict.to_csv("output/final_verdict_with_jd_cv.csv", index=False)

In [12]:
final_verdict.head(3).T

Unnamed: 0,0,1,2
cv_category,Hadoop,Hadoop,Hadoop
cv,"Skill Set: Hadoop, Map Reduce, HDFS, Hive, Sqo...","Skill Set: Hadoop, Map Reduce, HDFS, Hive, Sqo...","Skill Set: Hadoop, Map Reduce, HDFS, Hive, Sqo..."
cv_id,c48924db-2f11-40a7-96cf-152ac401e14c,c48924db-2f11-40a7-96cf-152ac401e14c,c48924db-2f11-40a7-96cf-152ac401e14c
job_title,Flutter Developer,Django Developer,Machine Learning
job_description,Job Title: Flutter Developer\nLocation: Malapp...,Job Responsibilities:\n6+ years of Hands-on ex...,About the role\nIn this role you will:\nBe wor...
job_id,fb3ec592-b093-46ca-8dfd-de3e164b49ac,7e259146-d9df-411e-8550-2fef51dc7bc9,31fde0c0-866a-48f9-b399-aad6f9c472a8
llama3_70b,no,no,no
gpt4o,no,no,no
anthropic,kiv,kiv,no
gpt,kiv,no,no


In [15]:
del job_pool, talent_pool, df_, llama3_70b_verdict, gpt4o_verdict

# EDA of results


In [14]:
print(f"total {final_verdict['job_id'].nunique()} job descriptions")

print("per jd, we have ~40 applications")
final_verdict.groupby(["job_id"])["cv_id"].nunique()

total 15 job descriptions
per jd, we have ~40 applications


job_id
2f1247bd-4c98-4357-8b8b-ac52ee8698b2    47
31fde0c0-866a-48f9-b399-aad6f9c472a8    47
40705682-6752-41f0-8a6d-b01b9d7b1746    47
5535b3b6-f919-4e04-bb23-5eb63436941f    47
66c5e115-cf87-4917-9cff-c2d72156cff6    47
769c3093-32c5-4122-ae8f-d4f99a22354a    47
7e259146-d9df-411e-8550-2fef51dc7bc9    47
83d835b5-ab57-4044-ace8-a5d8ffc0e254    47
8f52ff7e-1929-49d9-86d3-052e98986b34    47
90c1ee72-9c8d-46d4-a671-cb7cba7f75df    47
91793843-4ca9-4eb6-9d84-8c59a3d19812    47
b04cc5ce-b93e-427e-9439-0965e64779ff    47
ca0539ba-835b-48fb-bed0-705d6668c372    47
dcb1513a-460d-46f7-81b1-23294a691bfd    47
fb3ec592-b093-46ca-8dfd-de3e164b49ac    47
Name: cv_id, dtype: int64

## distribution of suitability


In [23]:
mod_list = ["llama3_70b", "gpt4o", "anthropic", "gpt", "llama3"]

for mod in mod_list:
  print(final_verdict[mod].value_counts(normalize=True))
  print()

llama3_70b
no     0.617021
kiv    0.238298
yes    0.144681
Name: proportion, dtype: float64

gpt4o
no     0.663818
kiv    0.317664
yes    0.018519
Name: proportion, dtype: float64

anthropic
kiv    0.535714
no     0.464286
Name: proportion, dtype: float64

gpt
kiv    0.764110
no     0.219971
yes    0.015919
Name: proportion, dtype: float64

llama3
kiv    0.813880
no     0.164038
yes    0.022082
Name: proportion, dtype: float64



In [24]:
final_verdict.head()

Unnamed: 0,cv_category,cv,cv_id,job_title,job_description,job_id,llama3_70b,gpt4o,anthropic,gpt,llama3
0,Hadoop,"Skill Set: Hadoop, Map Reduce, HDFS, Hive, Sqo...",c48924db-2f11-40a7-96cf-152ac401e14c,Flutter Developer,Job Title: Flutter Developer\nLocation: Malapp...,fb3ec592-b093-46ca-8dfd-de3e164b49ac,no,no,kiv,kiv,no
1,Hadoop,"Skill Set: Hadoop, Map Reduce, HDFS, Hive, Sqo...",c48924db-2f11-40a7-96cf-152ac401e14c,Django Developer,Job Responsibilities:\n6+ years of Hands-on ex...,7e259146-d9df-411e-8550-2fef51dc7bc9,no,no,kiv,no,
2,Hadoop,"Skill Set: Hadoop, Map Reduce, HDFS, Hive, Sqo...",c48924db-2f11-40a7-96cf-152ac401e14c,Machine Learning,About the role\nIn this role you will:\nBe wor...,31fde0c0-866a-48f9-b399-aad6f9c472a8,no,no,no,no,
3,Hadoop,"Skill Set: Hadoop, Map Reduce, HDFS, Hive, Sqo...",c48924db-2f11-40a7-96cf-152ac401e14c,iOS Developer,Technical Skills:\nMinimum 4 years of experien...,83d835b5-ab57-4044-ace8-a5d8ffc0e254,no,no,,kiv,kiv
4,Hadoop,"Skill Set: Hadoop, Map Reduce, HDFS, Hive, Sqo...",c48924db-2f11-40a7-96cf-152ac401e14c,Full Stack Developer,position description demonstrates up-to-date e...,2f1247bd-4c98-4357-8b8b-ac52ee8698b2,yes,kiv,,kiv,


## gpt-4o vs llama3-70b


In [25]:
final_verdict[["llama3_70b", "gpt4o"]].value_counts(normalize=True)

llama3_70b  gpt4o
no          no       0.542735
kiv         kiv      0.126781
yes         kiv      0.116809
kiv         no       0.111111
no          kiv      0.074074
yes         yes      0.015670
            no       0.009972
kiv         yes      0.001425
no          yes      0.001425
Name: proportion, dtype: float64

In [28]:
disagreement = final_verdict[final_verdict["llama3_70b"] != final_verdict["gpt4o"]]
disagreement.to_csv("output/disagreement_gpt4o_llama3_70b.csv", index=False)

In [424]:
import gradio as gr
import pandas as pd
import json
from pathlib import Path

# Load the data
final_verdict = pd.read_csv("output/final_verdict_with_jd_cv.csv")

# Global variable to keep track of the current index
current_index = 0

def load_llama3_reasoning(job_id, cv_id):
    json_path = Path(f"output_20240831_0124/{job_id}_{cv_id}_groq.json")
    if json_path.exists():
        with open(json_path, "r") as f:
            data = json.load(f)
            return json.dumps(data, indent=2)
    return "Reasoning not available"

def load_entry(index):
    entry = final_verdict.iloc[index]
    llama3_reasoning = load_llama3_reasoning(entry['job_id'], entry['cv_id'])
    return (
        entry['job_title'],
        entry['job_description'],
        entry['cv_category'],
        entry['cv'],
        f"Anthropic: {entry['anthropic']}\nGPT: {entry['gpt']}\nLlama3: {entry['llama3_70b']}",
        llama3_reasoning
    )

def update_decision(decision):
    global current_index
    final_verdict.loc[current_index, 'user_decision'] = decision
    final_verdict.to_csv("output/final_verdict_with_user_decision.csv", index=False)
    current_index += 1
    return load_entry(current_index)

def shortlist():
    return update_decision("Shortlist")

def reject():
    return update_decision("Reject")

def kiv():
    return update_decision("KIV")

def load_first_entry():
    return load_entry(0)

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            job_title = gr.Textbox(label="Job Title")
            job_description = gr.Textbox(label="Job Description", lines=10)
        with gr.Column():
            cv_category = gr.Textbox(label="CV Category")
            cv = gr.Textbox(label="Resume (CV)", lines=10)
    
    model_verdicts = gr.Textbox(label="Model Verdicts", lines=3)
    llama3_reasoning = gr.Code(label="Llama3 Reasoning", language="json")
    
    with gr.Row():
        shortlist_btn = gr.Button("Shortlist", variant="primary")
        reject_btn = gr.Button("Reject", variant="stop")
        kiv_btn = gr.Button("KIV", variant="secondary")
    
    shortlist_btn.click(shortlist, outputs=[job_title, job_description, cv_category, cv, model_verdicts, llama3_reasoning])
    reject_btn.click(reject, outputs=[job_title, job_description, cv_category, cv, model_verdicts, llama3_reasoning])
    kiv_btn.click(kiv, outputs=[job_title, job_description, cv_category, cv, model_verdicts, llama3_reasoning])
    
    demo.load(load_first_entry, outputs=[job_title, job_description, cv_category, cv, model_verdicts, llama3_reasoning])

demo.launch()

Running on local URL:  http://127.0.0.1:7861
IMPORTANT: You are using gradio version 4.19.2, however version 4.29.0 is available, please upgrade.
--------

To create a public link, set `share=True` in `launch()`.




looking at the log, a number of results are missing due to parsing error (llama3) and rate limit (anthropic)


In [417]:
# log_file_dir = Path("output/evaluation_log.txt")

# # def extract_json(error_message):

# def process_log_file(log_file_path, output_file_path):
  
#   with open(log_file_path, "r") as file:
#     log_content = file.read()
    
  
  

In [418]:
# log_file_dir = Path("output/evaluation_log.txt")

# with open(log_file_dir, "r") as file:
#     log_content = file.read()

In [419]:

# import re

# error_entries = re.findall(r"ERROR - Error with llama3 for job_id:.*, cv_id:.*(?=\.)", log_content)

# uuid_pattern =  r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"

In [420]:
# job_id = re.search(r'job_id: ([^,\s]+)', entry)
# cv_id = re.search(r'cv_id: ([^,\s]+)', entry)


# re.search(entry, log_content).group()
# # overall result

In [421]:
# error_pattern = r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} - ERROR -'
# info_pattern =r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} - INFO -' 
# sections = re.split(error_pattern, log_content, flags=re.MULTILINE)
# sections = [item.strip() for item in sections if item.strip().startswith("Error with llama3")]
# sections = [re.split(info_pattern, sec)[0] for sec in sections]
# json_strs = [re.search(r'(\{.*\})', sec, re.DOTALL).group(1) for sec in sections]

In [175]:
# import json
# count = 0
# for j in tqdm(json_strs, total=len(json_strs)):
#   try:
#     json.loads(j)
#   except:
#     print(j)
#     print("-"*100)  
#     count += 1
#     break

  0%|          | 0/375 [00:00<?, ?it/s]

{
  "job_description_analysis": {
    "technical_skills": [
      {"Flutter": "essential"},
      {"Android Application Development": "essential"},
      {"Algorithms": "essential"},
      {"Restful Web Services": "essential"},
      {"Offline Storage": "advantageous"},
      {"Threading": "advantageous"},
      {"Performance Tuning": "advantageous"},
      {"Push Notifications": "essential"},
      {"API Integrations": "essential"}
    ],
    "soft_skills": [
      "Analytical",
      "Problem Solving",
      "Decision Making"
    ],
    "required_experience": [1],
    "education_qualifications": []
  },
  "resume_evaluation": {
    "original_scores": {
      "technical_skills": 0,
      "soft_skills": 0,
      "relevant_experience": 0,
      "qualifications": 0
    },
    "missing_skills": [
      {"Offline Storage": "advantageous"},
      {"Threading": "advantageous"},
      {"Performance Tuning": "advantageous"}
    ]
  },
  "deeper_analysis": {
    "inferred_experiences": [
      

# overall result


In [67]:
df.groupby(["Model"])["suitability"].value_counts(normalize=True)

Model      suitability
anthropic  kiv            0.535714
           no             0.464286
gpt        kiv            0.764110
           no             0.219971
           yes            0.015919
llama3     kiv            0.813880
           no             0.164038
           yes            0.022082
Name: proportion, dtype: float64

## does the model agree with each other?


In [185]:
# get the results o
model_results_comparison = df.groupby(["job_id", "cv_id"])[["Model", "suitability"]].value_counts().reset_index()

In [187]:
model_results_comparison.to_csv("model_results_comparison.csv", index=False)

In [230]:
df_ = df.pivot_table(index=["job_id", "cv_id"], columns="Model", values="suitability", aggfunc="first").reset_index()
df_["count"] = df_[["anthropic", "gpt", "llama3"]].count(axis=1)
# df_["agreement"] = (df_[["anthropic", "gpt", "llama3"]].nunique(dropna=True, axis=1) == 1) & (df_["count"] > 1)
df_["agreement"] = (df_[["anthropic", "gpt", "llama3"]].nunique(dropna=True, axis=1) == 1) 
df_.to_csv("model_results_per_jd_cv.csv", index=False)

In [234]:
df_[df_["count"] > 1]["agreement"].value_counts(normalize=True)

agreement
True     0.71466
False    0.28534
Name: proportion, dtype: float64

In [246]:
disagreement_df = df_[(df_["count"] > 1) & (~df_["agreement"])]
disagreement_df.to_csv("disagreement_df.csv", index=False)

# eyeballing the disagreement


In [266]:
import pandas as pd 
from IPython.display import display, Markdown

job_pool = pd.read_csv("output/filtered_job_description.csv")
talent_pool = pd.read_csv("output/filtered_talent_pool.csv")

In [293]:
job_id = "2f1247bd-4c98-4357-8b8b-ac52ee8698b2"
cv_id = "a6f20105-1395-4f20-b1c9-139ae9f0ef42"

def get_jd_cv(job_id, cv_id):
  job = job_pool[job_pool["Job ID"] == job_id]["Job Description"].values[0]
  cv = talent_pool[talent_pool["ID"] == cv_id]["Resume"].values[0]
  
  pprint((f"**Job Description:**\n{job}"))
  print()
  pprint((f"**CV:**\n{cv}"))

get_jd_cv(job_id, cv_id)

('**Job Description:**\n'
 'position description demonstrates up-to-date expertise software engineering '
 'applies development execution improvement action plan manages small '
 'medium-sized complex team project model compliance company policy procedure '
 'support company standard ethic integrity provides support implementation '
 'business solution provides support business new existing system '
 "troubleshoots business production issue minimum qualification bachelor 's "
 'degree computer science related field 4 year experience building scalable '
 'ecommerce application mobile software additional preferred qualification '
 'company summary walmart ecommerce team rapidly innovating evolve define '
 'future state shopping world ’ largest retailer mission help people save '
 'money live better help brightest mind technology merchandising marketing '
 'supply chain talent reimagining intersection digital physical shopping help '
 'achieve mission position summary walmart lab ’ reinve

In [309]:
talent_pool[talent_pool["ID"] == cv_id]["Resume"].values[0]

'Skill Set Cisco Certified Network Associates (CCNA): -  Basic knowledge of networking such as Ethernet mediums, ethernet communication, types of Ethernet communication devices etc.  IPv4 (Subnetting, Supernetting)  Basic configuration of Routing, Switching, Access lists, Network Address Translation (NAT), Virtual LANs (VLANs) etc Cisco Certified Network Associates (CCNA-Security): -  Basic security goals and need.  Different types of attacks like access attacks, reconnaissance strategies etc.  Basic configuration of firewalls.  Mitigation techniques such as access control lists, private VLANs, VLAN hopping, IP source guard, DHCP snooping, Authentication, Authorization & Accounting (AAA), IP Security (IPsec) etc. Cisco Certified Network Professional (CCNP-Routing, Switching & MPLS): -  Routing-Configuration and concept of EIGRP, OSPF and BGP.  Switching-Virtual LANs (VLANs), spanning tree protocol (STP), queuing etc.  MPLS (Multi-Protocol Label Switching) -Basic idea about working, typ

In [301]:
talent_pool[talent_pool["ID"] == cv_id]["Resume"].values[0]

'Skill Set Cisco Certified Network Associates (CCNA): -  Basic knowledge of networking such as Ethernet mediums, ethernet communication, types of Ethernet communication devices etc.  IPv4 (Subnetting, Supernetting)  Basic configuration of Routing, Switching, Access lists, Network Address Translation (NAT), Virtual LANs (VLANs) etc Cisco Certified Network Associates (CCNA-Security): -  Basic security goals and need.  Different types of attacks like access attacks, reconnaissance strategies etc.  Basic configuration of firewalls.  Mitigation techniques such as access control lists, private VLANs, VLAN hopping, IP source guard, DHCP snooping, Authentication, Authorization & Accounting (AAA), IP Security (IPsec) etc. Cisco Certified Network Professional (CCNP-Routing, Switching & MPLS): -  Routing-Configuration and concept of EIGRP, OSPF and BGP.  Switching-Virtual LANs (VLANs), spanning tree protocol (STP), queuing etc.  MPLS (Multi-Protocol Label Switching) -Basic idea about working, typ

In [294]:

for file in Path("output_20240830_0136").iterdir():
  if file.is_file() and file.name.startswith(f"{job_id}_{cv_id}"):
    if file.name.endswith(".csv"):
      df = pd.read_csv(file)
      display(df)
    if file.name.endswith(".json"):
      model_name = file.name.split("_")[2][:-5]
      with open(file, "r") as f:
        data = json.load(f)
        print("-"*100)
        print(model_name)
        print("-"*100)
        pprint(data)


Unnamed: 0,Model,original_technical_skills,recalibrated_technical_skills,original_soft_skills,recalibrated_soft_skills,original_required_experience,recalibrated_required_experience,original_qualifications,recalibrated_qualifications,suitability
0,llama3,20,30,0,0,,,0,,no
1,gpt,85,90,0,0,,,70,,yes


----------------------------------------------------------------------------------------------------
llama3
----------------------------------------------------------------------------------------------------
{'assessment': {'missing_skills': [{'Software Engineering': 'essential'},
                                   {'Development Execution Improvement Action Plan': 'essential'}],
                'potential_concerns': ['Lack of relevant experience in '
                                       'software engineering and development '
                                       'execution improvement action plan'],
                'strengths': ['Networking Experience'],
                'suitability': 'no'},
 'deeper_analysis': {'inferred_experiences': []},
 'job_description_analysis': {'education_qualifications': [],
                              'required_experience': [4],
                              'soft_skills': ['Communication',
                                              'Problem-solvin

In [307]:
# extract all json for the same jd 
for file in Path("output_20240830_0136").iterdir():
  if file.is_file() and file.name.startswith(f"{job_id}") and file.name.endswith(".json"):
      model_name = file.name.split("_")[2][:-5]
      with open(file, "r") as f:
          data = json.load(f)
          print("-"*100)
          print(model_name)
          print("-"*100)

          try:
            pprint(data["job_description_analysis"])
          except:
            print("no job description analysis")

----------------------------------------------------------------------------------------------------
gpt
----------------------------------------------------------------------------------------------------
{'education_qualifications': ["Bachelor's degree in Computer Science or "
                              'related field'],
 'required_experience': 4,
 'soft_skills': ['team management', 'problem-solving', 'communication'],
 'technical_skills': {'.Net': 'essential',
                      'C': 'essential',
                      'C++': 'essential',
                      'CSS': 'essential',
                      'HTML5': 'essential',
                      'IBM DB2': 'advantageous',
                      'IBM Rational Rose': 'advantageous',
                      'Java': 'essential',
                      'Javascript': 'advantageous',
                      'MY-SQL': 'advantageous',
                      'MapReduce': 'essential',
                      'Oracle (PL-SQL)': 'advantageous',
     

looks like ChatGPT didnt extract required skills correctly


In [274]:
from pprint import pprint

pprint(job_pool[job_pool["Job ID"] == job_id]["Job Description"].values[0])

('position description demonstrates up-to-date expertise software engineering '
 'applies development execution improvement action plan manages small '
 'medium-sized complex team project model compliance company policy procedure '
 'support company standard ethic integrity provides support implementation '
 'business solution provides support business new existing system '
 "troubleshoots business production issue minimum qualification bachelor 's "
 'degree computer science related field 4 year experience building scalable '
 'ecommerce application mobile software additional preferred qualification '
 'company summary walmart ecommerce team rapidly innovating evolve define '
 'future state shopping world ’ largest retailer mission help people save '
 'money live better help brightest mind technology merchandising marketing '
 'supply chain talent reimagining intersection digital physical shopping help '
 'achieve mission position summary walmart lab ’ reinventing world ’ leading '
 '

In [275]:
pprint(talent_pool[talent_pool["ID"] == cv_id]["Resume"].values[0])

('Education Details \n'
 'February 2006 to February 2006 TYBCOM Commerce  mumbai\n'
 'Business Analyst \n'
 'Business Analyst\n'
 'Skill Details \n'
 'Company Details \n'
 'company - Motilal Oswal\n'
 'description - Business Analyst\n'
 'Handling IT Operation for Institutional Equities \n'
 'Maintain Daily MIS in Excel for CAG, Research, Derivative, Sales team '
 'Preparing Auto Dashboard For Research, Sales, Trading team Working on Excel '
 'Macro to Create Innovative Report \n'
 'Working on Block Related Data Working on BD Fund from different GEO Working '
 'on Investors Corporate Meeting to track Corporate Block & Fund Interest in '
 'Sector\n'
 'company - FSS\n'
 'description - Project Description:\n'
 'Maintain and prepare cash indent, cash report, cash position, and cash '
 'planning\n'
 'Responsibilities:\n'
 ' Maintain Daily MIS in excel.\n'
 ' Provide complete information about MIS & ATM.\n'
 ' Maintain and prepare cash indent, cash report, cash position., cash '
 'planning\n'

In [40]:
df_count = df.groupby(["job_id", "cv_id"])["Model"].count().reset_index()

In [422]:
# one_model_tuple = df_count[df_count["Model"] < 2][["job_id", "cv_id"]].values
# one_model_df = df[df.apply(lambda row: (row["job_id"], row["cv_id"]) in one_model_tuple, axis=1)]


In [71]:
x = """{
  "job_description_analysis": {
    "technical_skills": [
      {"MS SQL Server installation, configuration, and administration": "essential"},
      {"Always on availability groups, log shipping, database mirroring, and clustering": "advantageous"},
      {"SQL server patching activity": "essential"},
      {"Performance tuning": "essential"}
    ],
    "soft_skills": ["Troubleshooting", "Communication"],
    "required_experience": [3],
    "education_qualifications": []
  },
  "resume_evaluation": {
    "original_scores": {
      "technical_skills": 85,
      "soft_skills": 90,
      "relevant_experience": 80,
      "qualifications": 0
    },
    "missing_skills": [
      {"Database performance monitoring and optimization": "essential"}
    ]
  },
  "deeper_analysis": {
    "inferred_experiences": ["Capacity planning for database growth"]
  },
  "recalibrated_scores": {
    "technical_skills": 90,
    "soft_skills": 95,
    "experience": 85,
    "education_qualifications": 0
  },
  "assessment": {
    "suitability": "yes",
    "strengths": ["Strong technical skills in MS SQL Server", "Experience with Always on availability groups"],
    "potential_concerns": ["Limited experience with database performance monitoring and optimization"],
    "missing_skills": [
      {"Database performance monitoring and optimization": "essential"}
    ]
  }
}"""