In [48]:
# Step 1: Librarys
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import math

In [49]:
# Step 2: Resume, Job Description
resume_text = "I am a PHP Laravel developer."
job_text = "Looking for a Laravel expert with PHP knowledge."
resume_text = resume_text.lower()
job_text = job_text.lower()

In [50]:
# Step 3: List
documents = [resume_text, job_text]

In [55]:
# Step 4: TF-IDF Model
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(documents)

In [56]:
# Step 5: Calculate Similarity (0 to 1 scale)
similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

In [57]:
# Step 6: Convert out of 10
score_out_of_10 = math.ceil(round(similarity_score * 10, 2))

In [58]:
# Step 7: Result
print("Matching Score (out of 10):", score_out_of_10)

Matching Score (out of 10): 4


In [61]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import math

# Step 1: CSV file load
df = pd.read_csv("resume_job_dataset.csv")

# Step 2: calculate matching resume & job
vectorizer = TfidfVectorizer(stop_words='english')

for index, row in df.iterrows():
    resume = row['resume_text']
    job = row['job_description']
    
    documents = [resume.lower(), job.lower()]
    tfidf = vectorizer.fit_transform(documents)
    score = cosine_similarity(tfidf[0:1], tfidf[1:2])[0][0]
    
    print(f"Resume ID {row['resume_id']} vs Job ID {row['job_id']} → Matching Score (out of 10): {math.ceil(round(score * 10, 2))}")

Resume ID 1 vs Job ID 101 → Matching Score (out of 10): 4
Resume ID 2 vs Job ID 102 → Matching Score (out of 10): 4
Resume ID 3 vs Job ID 103 → Matching Score (out of 10): 6
Resume ID 4 vs Job ID 104 → Matching Score (out of 10): 7
Resume ID 5 vs Job ID 105 → Matching Score (out of 10): 4


In [7]:
import pandas as pd
import spacy
import math

# spaCy model load
nlp = spacy.load("en_core_web_md")

# CSV load
df = pd.read_csv("professional_resume_job_dataset.csv")

# score store
scores = []

# find similarity for per resume-job
for index, row in df.iterrows():
    resume_doc = nlp(str(row['resume_text']))
    job_doc = nlp(str(row['job_description']))
    similarity = resume_doc.similarity(job_doc)
    score_out_of_10 = (round(similarity * 10, 2))
    scores.append(math.ceil(score_out_of_10))
    print(f"Resume ID {row['resume_id']} vs Job ID {row['job_id']} → Matching Score (out of 10): {math.ceil(score_out_of_10)}")

# add new column
df['semantic_score_out_of_10'] = scores

# store on new csv
# df.to_csv("resume_job_semantic_scores.csv", index=False)

# print("✅ Done! Matching scores saved in 'resume_job_semantic_scores.csv'")


Resume ID 1 vs Job ID 101 → Matching Score (out of 10): 10
Resume ID 2 vs Job ID 102 → Matching Score (out of 10): 9
Resume ID 3 vs Job ID 103 → Matching Score (out of 10): 10
Resume ID 4 vs Job ID 104 → Matching Score (out of 10): 10
Resume ID 5 vs Job ID 105 → Matching Score (out of 10): 9
Resume ID 6 vs Job ID 106 → Matching Score (out of 10): 10
Resume ID 7 vs Job ID 107 → Matching Score (out of 10): 9
Resume ID 8 vs Job ID 108 → Matching Score (out of 10): 10
Resume ID 9 vs Job ID 109 → Matching Score (out of 10): 10
Resume ID 10 vs Job ID 110 → Matching Score (out of 10): 9
Resume ID 11 vs Job ID 111 → Matching Score (out of 10): 9
Resume ID 12 vs Job ID 112 → Matching Score (out of 10): 9
Resume ID 13 vs Job ID 113 → Matching Score (out of 10): 10
Resume ID 14 vs Job ID 114 → Matching Score (out of 10): 9
Resume ID 15 vs Job ID 115 → Matching Score (out of 10): 9
Resume ID 16 vs Job ID 116 → Matching Score (out of 10): 10
Resume ID 17 vs Job ID 117 → Matching Score (out of 10): 