Required Libraries:

In [None]:
pip install transformers torch


Load Pre-trained BERT Model and Tokenizer:

In [None]:
from transformers import BertTokenizer, BertModel
import torch

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')


Preprocess Text Data:

In [None]:
def preprocess_text(text):
    inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True, padding='max_length')
    return inputs

# Example usage
text = "Software engineer with 5 years of experience in Python and machine learning."
inputs = preprocess_text(text)


Extract Features Using BERT:

In [None]:
def get_embeddings(inputs):
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)  # Taking mean of all token embeddings
    return embeddings

# Example usage
embeddings = get_embeddings(inputs)
print(embeddings)


Build a Classifier:

In [None]:
from torch import nn, optim

class CandidateClassifier(nn.Module):
    def __init__(self):
        super(CandidateClassifier, self).__init__()
        self.fc = nn.Linear(768, 1)  # BERT base model outputs 768-dimensional embeddings
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc(x)
        x = self.sigmoid(x)
        return x

# Initialize the model, loss function, and optimizer
classifier = CandidateClassifier()
criterion = nn.BCELoss()
optimizer = optim.Adam(classifier.parameters(), lr=1e-3)

# Example training step
def train_step(embeddings, labels):
    classifier.train()
    optimizer.zero_grad()
    outputs = classifier(embeddings)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    return loss.item()

# Example usage
labels = torch.tensor([1])  # Example label for the training example (1: suitable, 0: not suitable)
loss = train_step(embeddings, labels)
print(f"Training loss: {loss}")


Screen Candidates:

In [None]:
def screen_candidates(classifier, candidate_texts):
    classifier.eval()
    candidate_scores = []
    for text in candidate_texts:
        inputs = preprocess_text(text)
        embeddings = get_embeddings(inputs)
        with torch.no_grad():
            score = classifier(embeddings).item()
        candidate_scores.append((text, score))
    return sorted(candidate_scores, key=lambda x: x[1], reverse=True)

# Example usage
candidate_texts = ["Experienced data scientist with expertise in Python and deep learning.",
                   "Junior software developer with knowledge of Java and web development."]
ranked_candidates = screen_candidates(classifier, candidate_texts)
print(ranked_candidates)
