In [1]:
!pip install -q -U "torch==2.0.1" "torchvision" "torchaudio"
!pip install -q -U "sentence-transformers==2.2.2" "transformers==4.30.2" "huggingface_hub==0.25.0" "datasets" "accelerate" "peft" "diffusers"


[0m

In [15]:
# ✅ Import Necessary Libraries
import pandas as pd
import random
from sentence_transformers import SentenceTransformer, InputExample, losses
from torch.utils.data import DataLoader
import os

# ✅ Disable Weights & Biases Logging
os.environ["WANDB_DISABLED"] = "true"

# ✅ Load Pretrained Model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# ✅ Load Dataset (Use Only One CSV)
file_path = "/content/UpdatedResumeDataSet.csv"  # Ensure this file exists!
df = pd.read_csv(file_path)

# ✅ Create Fine-Tuning Dataset: Job Descriptions + Resume + Labels
train_examples = []

# 🔹 Generate Positive Pairs (Matching Job Role & Resume → Label = 1.0)
for _, row in df.iterrows():
    job_desc = row["Category"]  # Use Job Role as job description
    resume = row["Resume"]
    train_examples.append(InputExample(texts=[job_desc, resume], label=1.0))

# 🔹 Generate Negative Pairs (Mismatching Job Role & Resume → Label = 0.0)
categories = df["Category"].unique()  # Get all unique job roles
for _, row in df.iterrows():
    wrong_category = random.choice(categories)
    while wrong_category == row["Category"]:  # Ensure it's a different category
        wrong_category = random.choice(categories)
    train_examples.append(InputExample(texts=[wrong_category, row["Resume"]], label=0.0))

# ✅ Ensure Dataset is Not Empty
if len(train_examples) == 0:
    raise ValueError("❌ Error: train_examples is empty! Please check dataset processing.")

# ✅ Create DataLoader (Correct Version)
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=2)

# ✅ Define Loss Function
train_loss = losses.CosineSimilarityLoss(model)

# ✅ Train the Model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=5,  # Increase for better accuracy
    warmup_steps=10
)

# ✅ Save Fine-Tuned Model
model.save("/content/bert_model")
print("✅ Fine-tuning complete!")

# ✅ Test Fine-Tuned Model
from sentence_transformers import util

# Load the fine-tuned model
fine_tuned_model = SentenceTransformer("/content/bert_model")

# Example Job Description & Resume
job_desc = "Software Engineer role, requires Python and Machine Learning"
resume = "John has 3 years of experience in Python and AI development"

# Convert to embeddings
job_embedding = fine_tuned_model.encode(job_desc, convert_to_tensor=True)
resume_embedding = fine_tuned_model.encode(resume, convert_to_tensor=True)

# Compute similarity
similarity_score = util.pytorch_cos_sim(job_embedding, resume_embedding).item()
print(f"🔍 Similarity Score: {similarity_score:.2f}")

# ✅ Download Fine-Tuned Model
from google.colab import files
import shutil

# Zip the model for downloading
shutil.make_archive("/content/bert_model", 'zip', "/content/bert_model")



Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Iteration:   0%|          | 0/962 [00:00<?, ?it/s]

Iteration:   0%|          | 0/962 [00:00<?, ?it/s]

Iteration:   0%|          | 0/962 [00:00<?, ?it/s]

Iteration:   0%|          | 0/962 [00:00<?, ?it/s]

Iteration:   0%|          | 0/962 [00:00<?, ?it/s]

✅ Fine-tuning complete!
🔍 Similarity Score: 0.84


'/content/bert_model.zip'

In [16]:
# Evaluate on Training Data
train_scores = []
for example in train_examples[:10]:  # Test on first 10 samples
    job_desc = example.texts[0]
    resume = example.texts[1]

    job_embedding = model.encode(job_desc, convert_to_tensor=True)
    resume_embedding = model.encode(resume, convert_to_tensor=True)

    similarity_score = util.pytorch_cos_sim(job_embedding, resume_embedding).item()
    train_scores.append(similarity_score)

print(f"🔍 Average Training Similarity Score: {sum(train_scores)/len(train_scores):.2f}")


🔍 Average Training Similarity Score: 0.99


In [18]:
# Test on Real Resume
job_desc = "Software Engineer role requiring Python and AI skills"
resume = "I have 3 years of experience in Python, TensorFlow, and AI model development."

job_embedding = model.encode(job_desc, convert_to_tensor=True)
resume_embedding = model.encode(resume, convert_to_tensor=True)

similarity_score = util.pytorch_cos_sim(job_embedding, resume_embedding).item()
print(f"🔍 Real-World Resume Similarity Score: {similarity_score:.2f}")


🔍 Real-World Resume Similarity Score: 0.88


In [17]:
# Download the trained model
files.download("/content/bert_model")
print("✅ Model downloaded successfully!")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Model downloaded successfully!
