In [4]:
import torch
import pandas as pd
import pickle
from transformers import BertTokenizerFast, BertForSequenceClassification

# Load model and tokenizer
model_path = "job_classifier_bert"
model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

# Load label encoder
with open("label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [5]:
def predict_job(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        logits = model(**inputs).logits
        pred = logits.argmax(-1).item()
    return label_encoder.inverse_transform([pred])[0]


In [3]:
import pandas as pd
# Load CSV file (adjust path as needed)
df = pd.read_csv("cv_chunk_1.csv")  # 🔁 Replace with your actual file

# Pick one row to simulate form input (e.g., index 0)
row = df.iloc[0]

# Combine all CV text fields
full_text = " ".join([
    str(row.get("work_experience_file", "")),
    str(row.get("school_experience_file", "")),
    str(row.get("org_experience_file", "")),
    str(row.get("profile_description_file", "")),
    str(row.get("other_experience_file", ""))
])

# Predict job
predicted_job = predict_job(full_text)
print("Predicted Job for:", row["full_name"])
print("→", predicted_job)


Predicted Job for: Mallory Brady
→ Admin
