In [1]:
import torch
import random
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
df=pd.read_csv('/kaggle/input/hirings-dataset/hiring.csv')
df.shape

(1500, 11)

In [4]:
df.head()

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision
0,26,1,2,0,3,27,48,78,91,1,1
1,39,1,4,12,3,26,35,68,80,2,1
2,48,0,2,3,2,10,20,67,13,2,0
3,34,1,2,5,2,6,36,27,70,3,0
4,30,0,1,6,1,43,23,52,85,2,0


In [5]:
X = df.drop(['HiringDecision'], axis=1).values
y = df['HiringDecision'].values

In [6]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [7]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Convert to PyTorch tensors
batch_size=32
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [9]:
#Dataset and Dataloader
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [10]:
#Model Definition
class CandidateRankingModel(nn.Module):
    def __init__(self, input_dim):
        super(CandidateRankingModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 1)  # Binary classification output
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

In [11]:
# Instantiate the model
input_dim = X_train.shape[1]
model = CandidateRankingModel(input_dim).to(device)

In [12]:
model

CandidateRankingModel(
  (fc1): Linear(in_features=10, out_features=128, bias=True)
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (relu): ReLU()
)

In [13]:
# Loss function and optimizer
torch.manual_seed(42)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 200
batch_size = 62

In [14]:
# Parameters
log_interval = 5
patience = 5    
min_delta = 1e-4

# Tracking variables
best_loss = float('inf')
early_stop_counter = 0

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Calculate average loss over the epoch
    avg_loss = running_loss / len(train_loader)

    # Print metrics at specified interval
    if (epoch + 1) % log_interval == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

    # Early stopping logic
    if avg_loss < best_loss - min_delta:
        best_loss = avg_loss
        early_stop_counter = 0
        print(f"Epoch----- {epoch + 1}   Loss improved :----- {best_loss:.4f}")
    else:
        early_stop_counter += 1

    # Stop training if no improvement is seen for 'patience' epochs
    if early_stop_counter >= patience:
        print(f" Early stopping triggered at epoch {epoch + 1}")
        print(f" Best Loss: {best_loss:.4f} achieved at epoch {epoch + 1 - patience}")
        break

Epoch----- 1   Loss improved :----- 0.5356
Epoch----- 2   Loss improved :----- 0.4324
Epoch----- 3   Loss improved :----- 0.3910
Epoch----- 4   Loss improved :----- 0.3842
Epoch [5/200], Loss: 0.3768
Epoch----- 5   Loss improved :----- 0.3768
Epoch----- 6   Loss improved :----- 0.3454
Epoch----- 8   Loss improved :----- 0.3225
Epoch [10/200], Loss: 0.3478
 Early stopping triggered at epoch 13
 Best Loss: 0.3225 achieved at epoch 8


In [15]:
# Model Evaluation
def evaluate(model, loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            probs = torch.sigmoid(outputs)  # Convert logits to probabilities
            preds = (probs >= 0.5).float()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())

    # Metrics calculation
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_preds)

    return {
        'Accuracy': f"{accuracy:.4f}",
        'Precision': f"{precision:.4f}",
        'Recall': f"{recall:.4f}",
        'F1-Score': f"{f1:.4f}",
        'AUC-ROC': f"{auc:.4f}"
    }
# Evaluate the model
metrics = evaluate(model, test_loader, device)
print("\nModel Evaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value}")


Model Evaluation Metrics:
Accuracy: 0.9000
Precision: 0.8161
Recall: 0.8353
F1-Score: 0.8256
AUC-ROC: 0.8804


In [16]:
#Candidate Prediction and Ranking   , Predict on the entire dataset
model.eval()
with torch.no_grad():
    all_preds = torch.sigmoid(model(torch.tensor(X, dtype=torch.float32).to(device))).cpu().numpy().flatten()

# predicted scores to the dataframe
df['predicted_score'] = all_preds

# Rank candidates
top_n = 1500
top_candidates = df.sort_values('predicted_score', ascending=False).head(top_n)
print("\nTop N Candidates Based on Predicted Scores:")
print(top_candidates[['Age', 'Gender', 'ExperienceYears', 'InterviewScore', 'predicted_score']])


Top N Candidates Based on Predicted Scores:
      Age  Gender  ExperienceYears  InterviewScore  predicted_score
1094   39       0               15              58         0.998879
1088   49       1               14              91         0.998866
863    48       1               14              80         0.997471
726    26       0               15              76         0.996712
1014   35       1               12             100         0.996436
...   ...     ...              ...             ...              ...
579    27       1                0              41         0.004020
472    33       1                0              64         0.004019
384    20       1                4              60         0.003530
466    47       1                2               9         0.003113
357    35       1                0              39         0.002415

[1500 rows x 5 columns]


In [17]:
top_candidates.head(10)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,predicted_score
1094,39,0,4,15,1,5,58,99,86,1,1,0.998879
1088,49,1,4,14,1,17,91,71,80,1,1,0.998866
863,48,1,2,14,2,35,80,99,94,1,1,0.997471
726,26,0,3,15,3,30,76,72,87,1,1,0.996712
1014,35,1,3,12,2,30,100,88,56,1,1,0.996436
86,37,1,2,8,3,40,96,96,83,1,1,0.996258
1095,44,0,4,8,3,34,69,65,98,1,1,0.996128
368,31,0,3,14,2,15,82,86,64,1,1,0.995941
516,45,0,4,10,4,29,49,69,96,1,1,0.99576
682,47,1,4,15,1,49,8,100,95,1,1,0.995754


In [18]:
top_candidates.tail(10)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,predicted_score
576,34,1,2,1,2,18,29,28,23,3,0,0.005985
853,50,1,2,1,2,18,22,2,7,2,0,0.005914
170,31,1,2,1,2,14,11,23,44,2,0,0.005904
1262,31,1,2,2,1,3,47,11,64,3,0,0.005511
763,22,1,2,2,2,25,62,8,17,2,0,0.004374
579,27,1,2,0,1,35,41,28,12,3,0,0.00402
472,33,1,1,0,1,23,64,52,14,2,0,0.004019
384,20,1,2,4,2,18,60,3,19,3,0,0.00353
466,47,1,1,2,1,22,9,3,7,2,0,0.003113
357,35,1,2,0,2,12,39,9,3,2,0,0.002415


In [19]:
model.state_dict()

OrderedDict([('fc1.weight',
              tensor([[ 0.1241,  0.1650,  0.2101,  ...,  0.2534, -0.1496, -0.4181],
                      [-0.1392,  0.3150, -0.0944,  ..., -0.1069, -0.2465,  0.1714],
                      [-0.3100, -0.1378,  0.0379,  ..., -0.3082, -0.1583,  0.2358],
                      ...,
                      [-0.0497, -0.1824, -0.2547,  ...,  0.1126,  0.1292,  0.1587],
                      [-0.2061,  0.1899,  0.2132,  ...,  0.2139, -0.0853,  0.1951],
                      [-0.0390,  0.2867,  0.1242,  ..., -0.0710, -0.1402,  0.0028]],
                     device='cuda:0')),
             ('fc1.bias',
              tensor([-0.2490,  0.2801,  0.0041, -0.0738,  0.0723,  0.1290,  0.0169, -0.0805,
                       0.2729, -0.1643, -0.2893,  0.2377,  0.1431, -0.0512,  0.0988,  0.1791,
                       0.2923,  0.2484,  0.1583,  0.1804, -0.1722,  0.3025,  0.2071, -0.1247,
                       0.0709, -0.1307,  0.0485,  0.2399, -0.2945,  0.2318,  0.0873, -0.1054

In [20]:
model_path = "CandidateRankingModel.pth"
torch.save(model, model_path)
print(f"🔥 Full model saved at: {model_path}")

🔥 Full model saved at: CandidateRankingModel.pth


# Pipeline

In [None]:
import torch
# Configurations

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Training parameters
BATCH_SIZE = 32
NUM_EPOCHS = 200
LEARNING_RATE = 0.001
PATIENCE = 5
MIN_DELTA = 1e-4
LOG_INTERVAL = 5

# Paths
DATA_PATH = "hiring.csv"
MODEL_PATH = "CandidateRankingModel.pth"


In [None]:
#Model Architecture

import torch
import torch.nn as nn

class CandidateRankingModel(nn.Module):
    def __init__(self, input_dim):
        super(CandidateRankingModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 1)  # Binary classification
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

In [None]:
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from torch.utils.data import DataLoader, TensorDataset

# 🚀 Load and preprocess data
def load_data(path, batch_size=32):
    df = pd.read_csv(path)

    # Split features and target
    X = df.drop(['HiringDecision'], axis=1).values
    y = df['HiringDecision'].values

    # Standardize features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Convert to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

    # Create DataLoader
    train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=batch_size, shuffle=False)

    return train_loader, test_loader, X, scaler

# 🚀 Training function
def train_model(model, train_loader, criterion, optimizer, num_epochs, device, patience=5, min_delta=1e-4):
    best_loss = float('inf')
    early_stop_counter = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for batch in train_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)

        # Logging
        if (epoch + 1) % 5 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

        # Early stopping
        if avg_loss < best_loss - min_delta:
            best_loss = avg_loss
            early_stop_counter = 0
        else:
            early_stop_counter += 1

        if early_stop_counter >= patience:
            print(f"Early stopping triggered at epoch {epoch + 1}")
            break

    print("Training complete!")
    return model

# 🚀 Evaluation function
def evaluate_model(model, loader, device):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for batch in loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            probs = torch.sigmoid(outputs)
            preds = (probs >= 0.5).float()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_preds)

    metrics = {
        'Accuracy': f"{accuracy:.4f}",
        'Precision': f"{precision:.4f}",
        'Recall': f"{recall:.4f}",
        'F1-Score': f"{f1:.4f}",
        'AUC-ROC': f"{auc:.4f}"
    }
    return metrics

In [None]:
import torch
from config import *
from model import CandidateRankingModel
from utils import load_data, train_model, evaluate_model

# 🚀 Load data
train_loader, test_loader, X, scaler = load_data(DATA_PATH, BATCH_SIZE)

# 🚀 Initialize model
input_dim = X.shape[1]
model = CandidateRankingModel(input_dim).to(DEVICE)

# 🚀 Loss and optimizer
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# 🚀 Train the model
model = train_model(model, train_loader, criterion, optimizer, NUM_EPOCHS, DEVICE, PATIENCE, MIN_DELTA)

# 🚀 Save the model
torch.save({'model_state_dict': model.state_dict()}, MODEL_PATH)
print(f"Model saved at {MODEL_PATH}")

# 🚀 Evaluate the model
metrics = evaluate_model(model, test_loader, DEVICE)
print("\nModel Evaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value}")

# 🚀 Inference and ranking candidates
model.eval()
with torch.no_grad():
    all_preds = torch.sigmoid(model(torch.tensor(X, dtype=torch.float32).to(DEVICE))).cpu().numpy().flatten()

# Attach predictions to DataFrame
df = pd.read_csv(DATA_PATH)
df['predicted_score'] = all_preds

# Rank top candidates
top_n = 1500
top_candidates = df.sort_values('predicted_score', ascending=False).head(top_n)
print("\nTop N Candidates Based on Predicted Scores:")
print(top_candidates[['Age', 'Gender', 'ExperienceYears', 'InterviewScore', 'predicted_score']])
