In [106]:
import pandas as pd
import json
import re
pd.set_option('display.max_colwidth', None)

In [107]:
def parse_line(line):
    # Split on first two commas only
    parts = line.strip().split(',', 2)
    if len(parts) != 3:
        return None
    
    resume_id = parts[0].strip()
    jd_id = parts[1].strip()
    
    json_str = parts[2].strip().replace(r'\_', '_')
    
    try:
        sim_data = json.loads(json_str)
    except json.JSONDecodeError:
        return None
    
    row = {
        'resume_id': resume_id,
        'jd_id': jd_id,
        'overall_match': float(sim_data['overall_match_percentage'].strip('%')),
    }
    
    # Add all score breakdown components
    for category, values in sim_data['score_breakdown'].items():
        row[f'{category}_score'] = float(values['score'].strip('%'))
    
    return row

In [108]:
# Read and parse file
data = []
with open('sim_score.txt', 'r') as f:
    for line in f:
        parsed = parse_line(line)
        if parsed:
            data.append(parsed)

In [109]:
df = pd.DataFrame(data)
percentage_cols = [col for col in df.columns if '_score' in col or '_match' in col]
df[percentage_cols] = df[percentage_cols].astype(float)

# df.head(50)

In [125]:
# df.head(50)

In [128]:
import pandas as pd
import ast

# df = pd.read_csv("./gold_samples/R_all_JD_all_scores_semantic.csv")
# df = pd.read_csv("./gold_samples/R_all_JD_all_scores.csv")
# df = pd.read_csv("./gold_samples/R_all_JD_all_scores_updated.csv")
# df = pd.read_csv("./gold_samples/R_all_JD_all_scores_refined.csv")
df = pd.read_csv("./gold_samples/R_all_JD_all_scores_chatgpt.csv")

df['score_json'] = df['score_json'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

df['overall_match'] = df['score_json'].apply(lambda x: float(x['overall_match_percentage'].strip('%')))

score_components = ['role_alignment', 'skills_match', 'experience_match', 'project_relevance', 'education_match']
for component in score_components:
    df[f'{component}_score'] = df['score_json'].apply(
        lambda x: float(x['score_breakdown'][component]['score'].strip('%')))
    df[f'{component}_reason'] = df['score_json'].apply(
        lambda x: x['score_breakdown'][component].get('reason', ''))

df = df.drop(columns=['score_json'])

df.head(10)

Unnamed: 0,resume_id,jd_id,overall_match,role_alignment_score,role_alignment_reason,skills_match_score,skills_match_reason,experience_match_score,experience_match_reason,project_relevance_score,project_relevance_reason,education_match_score,education_match_reason
0,R1,JD1,52.0,30.0,,0.0,,20.0,,2.0,,0.0,
1,R1,JD2,55.0,30.0,,0.0,,20.0,,0.0,,5.0,
2,R1,JD3,55.0,30.0,,0.0,,20.0,,0.0,,5.0,
3,R1,JD4,56.0,30.0,,0.0,,20.0,,1.0,,5.0,
4,R1,JD5,51.0,30.0,,0.0,,20.0,,1.0,,0.0,
5,R1,JD6,20.0,0.0,,0.0,,20.0,,0.0,,0.0,
6,R1,JD7,26.0,0.0,,0.0,,20.0,,1.0,,5.0,
7,R1,JD8,26.0,0.0,,0.0,,20.0,,1.0,,5.0,
8,R1,JD9,26.0,0.0,,0.0,,20.0,,1.0,,5.0,
9,R1,JD10,42.0,15.0,,0.0,,20.0,,2.0,,5.0,


In [130]:
df.columns

Index(['resume_id', 'jd_id', 'overall_match', 'role_alignment_score',
       'role_alignment_reason', 'skills_match_score', 'skills_match_reason',
       'experience_match_score', 'experience_match_reason',
       'project_relevance_score', 'project_relevance_reason',
       'education_match_score', 'education_match_reason'],
      dtype='object')

In [129]:
import pandas as pd

def print_top_k_matches(df, top_k=5):
    tie_breakers = [
        'overall_match',
        'role_alignment_score',
        'skills_match_score',
        'experience_match_score',
        'project_relevance_score',
        'education_match_score'
    ]
    
    for resume_id, group in df.groupby('resume_id'):
        sorted_group = group.sort_values(
            by=tie_breakers,
            ascending=[False] * len(tie_breakers)  # All descending
        )
        
        top_matches = sorted_group.head(top_k)
        
        print(f"\nResume: {resume_id}")
        print("-" * 30)
        for _, row in top_matches.iterrows():
            print(f"JD: {row['jd_id']} - {row['overall_match']:.1f}% match | Breakdown: Role {row['role_alignment_score']}%, " +
                  f"Skills {row['skills_match_score']}%, " +
                  f"Exp {row['experience_match_score']}%")
        
        print(f"\nShowing top {top_k} of {len(group)} matches")
        print("=" * 50)

        if resume_id == 'R2':
            break

print_top_k_matches(df, top_k=10)  # Change 3 to your desired K


Resume: R1
------------------------------
JD: JD4 - 56.0% match | Breakdown: Role 30.0%, Skills 0.0%, Exp 20.0%
JD: JD2 - 55.0% match | Breakdown: Role 30.0%, Skills 0.0%, Exp 20.0%
JD: JD3 - 55.0% match | Breakdown: Role 30.0%, Skills 0.0%, Exp 20.0%
JD: JD1 - 52.0% match | Breakdown: Role 30.0%, Skills 0.0%, Exp 20.0%
JD: JD5 - 51.0% match | Breakdown: Role 30.0%, Skills 0.0%, Exp 20.0%
JD: JD10 - 42.0% match | Breakdown: Role 15.0%, Skills 0.0%, Exp 20.0%
JD: JD18 - 41.0% match | Breakdown: Role 15.0%, Skills 0.0%, Exp 20.0%
JD: JD19 - 41.0% match | Breakdown: Role 15.0%, Skills 0.0%, Exp 20.0%
JD: JD36 - 41.0% match | Breakdown: Role 15.0%, Skills 0.0%, Exp 20.0%
JD: JD47 - 41.0% match | Breakdown: Role 15.0%, Skills 0.0%, Exp 20.0%

Showing top 10 of 50 matches

Resume: R10
------------------------------
JD: JD1 - 64.0% match | Breakdown: Role 30.0%, Skills 11.0%, Exp 20.0%
JD: JD48 - 64.0% match | Breakdown: Role 30.0%, Skills 11.0%, Exp 20.0%
JD: JD49 - 64.0% match | Breakdown: