In [1]:
import torch
import random
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
df=pd.read_csv('/kaggle/input/hirings-dataset/hiring.csv')
df.shape

(1500, 11)

In [4]:
df.head()

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision
0,26,1,2,0,3,27,48,78,91,1,1
1,39,1,4,12,3,26,35,68,80,2,1
2,48,0,2,3,2,10,20,67,13,2,0
3,34,1,2,5,2,6,36,27,70,3,0
4,30,0,1,6,1,43,23,52,85,2,0


In [5]:
X = df.drop(['HiringDecision'], axis=1).values
y = df['HiringDecision'].values

In [6]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [7]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Convert to PyTorch tensors
batch_size=32
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [9]:
#Dataset and Dataloader
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [10]:
#Model Definition
class CandidateRankingModel(nn.Module):
    def __init__(self, input_dim):
        super(CandidateRankingModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 1)  # Binary classification output
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

In [11]:
# Instantiate the model
input_dim = X_train.shape[1]
model = CandidateRankingModel(input_dim).to(device)

In [12]:
model

CandidateRankingModel(
  (fc1): Linear(in_features=10, out_features=128, bias=True)
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (relu): ReLU()
)

In [13]:
# Loss function and optimizer
torch.manual_seed(42)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 200
batch_size = 62

In [14]:
# Parameters
log_interval = 5
patience = 5    
min_delta = 1e-4

# Tracking variables
best_loss = float('inf')
early_stop_counter = 0

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Calculate average loss over the epoch
    avg_loss = running_loss / len(train_loader)

    # Print metrics at specified interval
    if (epoch + 1) % log_interval == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

    # Early stopping logic
    if avg_loss < best_loss - min_delta:
        best_loss = avg_loss
        early_stop_counter = 0
        print(f"Epoch----- {epoch + 1}   Loss improved :----- {best_loss:.4f}")
    else:
        early_stop_counter += 1

    # Stop training if no improvement is seen for 'patience' epochs
    if early_stop_counter >= patience:
        print(f" Early stopping triggered at epoch {epoch + 1}")
        print(f" Best Loss: {best_loss:.4f} achieved at epoch {epoch + 1 - patience}")
        break

Epoch----- 1   Loss improved :----- 0.5411
Epoch----- 2   Loss improved :----- 0.4167
Epoch----- 3   Loss improved :----- 0.3859
Epoch----- 4   Loss improved :----- 0.3727
Epoch [5/200], Loss: 0.3552
Epoch----- 5   Loss improved :----- 0.3552
Epoch----- 6   Loss improved :----- 0.3363
Epoch----- 7   Loss improved :----- 0.3278
Epoch----- 8   Loss improved :----- 0.3184
Epoch [10/200], Loss: 0.3310
Epoch----- 12   Loss improved :----- 0.3172
Epoch [15/200], Loss: 0.3054
Epoch----- 15   Loss improved :----- 0.3054
Epoch----- 17   Loss improved :----- 0.3021
Epoch [20/200], Loss: 0.2959
Epoch----- 20   Loss improved :----- 0.2959
Epoch----- 24   Loss improved :----- 0.2944
Epoch [25/200], Loss: 0.3041
Epoch----- 26   Loss improved :----- 0.2908
Epoch----- 27   Loss improved :----- 0.2819
Epoch----- 28   Loss improved :----- 0.2732
Epoch [30/200], Loss: 0.2875
 Early stopping triggered at epoch 33
 Best Loss: 0.2732 achieved at epoch 28


In [15]:
# Model Evaluation
def evaluate(model, loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            probs = torch.sigmoid(outputs)  # Convert logits to probabilities
            preds = (probs >= 0.5).float()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())

    # Metrics calculation
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_preds)

    return {
        'Accuracy': f"{accuracy:.4f}",
        'Precision': f"{precision:.4f}",
        'Recall': f"{recall:.4f}",
        'F1-Score': f"{f1:.4f}",
        'AUC-ROC': f"{auc:.4f}"
    }
# Evaluate the model
metrics = evaluate(model, test_loader, device)
print("\nModel Evaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value}")


Model Evaluation Metrics:
Accuracy: 0.9100
Precision: 0.8452
Recall: 0.8353
F1-Score: 0.8402
AUC-ROC: 0.8874


In [16]:
#Candidate Prediction and Ranking   , Predict on the entire dataset
model.eval()
with torch.no_grad():
    all_preds = torch.sigmoid(model(torch.tensor(X, dtype=torch.float32).to(device))).cpu().numpy().flatten()

# predicted scores to the dataframe
df['predicted_score'] = all_preds

# Rank candidates
top_n = 1500
top_candidates = df.sort_values('predicted_score', ascending=False).head(top_n)
print("\nTop N Candidates Based on Predicted Scores:")
print(top_candidates[['Age', 'Gender', 'ExperienceYears', 'InterviewScore', 'predicted_score']])


Top N Candidates Based on Predicted Scores:
      Age  Gender  ExperienceYears  InterviewScore  predicted_score
1094   39       0               15              58         0.999885
1088   49       1               14              91         0.999872
682    47       1               15               8         0.999442
516    45       0               10              49         0.999404
1095   44       0                8              69         0.999373
...   ...     ...              ...             ...              ...
576    34       1                1              29         0.002311
466    47       1                2               9         0.002004
384    20       1                4              60         0.001833
357    35       1                0              39         0.001705
579    27       1                0              41         0.001615

[1500 rows x 5 columns]


In [17]:
top_candidates.head(10)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,predicted_score
1094,39,0,4,15,1,5,58,99,86,1,1,0.999885
1088,49,1,4,14,1,17,91,71,80,1,1,0.999872
682,47,1,4,15,1,49,8,100,95,1,1,0.999442
516,45,0,4,10,4,29,49,69,96,1,1,0.999404
1095,44,0,4,8,3,34,69,65,98,1,1,0.999373
1014,35,1,3,12,2,30,100,88,56,1,1,0.998892
253,28,1,4,9,4,37,37,91,72,1,1,0.998755
1015,42,1,3,13,3,14,55,85,82,1,1,0.998747
447,34,0,4,15,5,16,35,96,43,1,1,0.998674
368,31,0,3,14,2,15,82,86,64,1,1,0.998619


In [18]:
top_candidates.tail(10)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,predicted_score
472,33,1,1,0,1,23,64,52,14,2,0,0.002961
763,22,1,2,2,2,25,62,8,17,2,0,0.002943
1446,38,1,2,1,1,36,32,41,3,2,0,0.002781
1333,23,1,1,2,5,17,23,7,50,3,0,0.002711
1262,31,1,2,2,1,3,47,11,64,3,0,0.002663
576,34,1,2,1,2,18,29,28,23,3,0,0.002311
466,47,1,1,2,1,22,9,3,7,2,0,0.002004
384,20,1,2,4,2,18,60,3,19,3,0,0.001833
357,35,1,2,0,2,12,39,9,3,2,0,0.001705
579,27,1,2,0,1,35,41,28,12,3,0,0.001615


In [19]:
model.state_dict()

OrderedDict([('fc1.weight',
              tensor([[-0.1884,  0.0194, -0.2476,  ..., -0.0410, -0.0596, -0.1385],
                      [ 0.0873, -0.0703,  0.2718,  ...,  0.2322,  0.1968,  0.3797],
                      [ 0.1720, -0.1817,  0.3030,  ...,  0.0833,  0.2017, -0.1188],
                      ...,
                      [ 0.0151, -0.0386, -0.2914,  ...,  0.0032, -0.2766,  0.1310],
                      [ 0.3691, -0.1162, -0.1714,  ..., -0.2434,  0.1432, -0.2698],
                      [ 0.1612, -0.1337, -0.1370,  ...,  0.2311,  0.2749, -0.4003]],
                     device='cuda:0')),
             ('fc1.bias',
              tensor([ 0.2542, -0.0957,  0.2841, -0.0780,  0.2723,  0.2970, -0.2766, -0.0182,
                      -0.0265,  0.1849,  0.3073,  0.1766,  0.1742, -0.0394, -0.1471, -0.0529,
                      -0.0599, -0.0683, -0.1689,  0.1190, -0.2498, -0.2684,  0.1032, -0.2512,
                       0.2351,  0.2916, -0.3132,  0.0271, -0.2055, -0.0421, -0.0499,  0.1926

In [20]:
model_path = "CandidateRankingModel.pth"
torch.save(model, model_path)
print(f"Full model saved at: {model_path}")

Full model saved at: CandidateRankingModel.pth
