In [1]:
import torch
import random
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error, explained_variance_score

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
df=pd.read_csv('/kaggle/input/hirings-dataset/hiring.csv')
df.shape

(1500, 11)

In [4]:
df.head(10)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision
0,26,1,2,0,3,27,48,78,91,1,1
1,39,1,4,12,3,26,35,68,80,2,1
2,48,0,2,3,2,10,20,67,13,2,0
3,34,1,2,5,2,6,36,27,70,3,0
4,30,0,1,6,1,43,23,52,85,2,0
5,27,0,3,14,4,32,54,50,50,1,1
6,48,0,2,6,1,17,24,52,64,3,0
7,40,0,4,13,3,11,6,3,92,3,0
8,26,1,3,6,5,29,80,78,51,1,1
9,45,1,2,2,5,30,92,16,94,3,0


In [5]:
X = df.drop(['HiringDecision'], axis=1).values
y = df['HiringDecision'].values

In [6]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [7]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Convert to PyTorch tensors
batch_size=32
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [9]:
#Dataset and Dataloader
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [10]:
#Model Definition
class CandidateRankingModel(nn.Module):
    def __init__(self, input_dim):
        super(CandidateRankingModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 1)  # Binary classification output
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

In [11]:
# Instantiate the model
input_dim = X_train.shape[1]
model = CandidateRankingModel(input_dim).to(device)

In [12]:
model

CandidateRankingModel(
  (fc1): Linear(in_features=10, out_features=128, bias=True)
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (relu): ReLU()
)

In [13]:
# Loss function and optimizer
torch.manual_seed(42)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 200
batch_size = 62

In [14]:
# Parameters
log_interval = 5
patience = 5    
min_delta = 1e-4

# Tracking variables
best_loss = float('inf')
early_stop_counter = 0

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Calculate average loss over the epoch
    avg_loss = running_loss / len(train_loader)

    # Print metrics at specified interval
    if (epoch + 1) % log_interval == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

    # Early stopping logic
    if avg_loss < best_loss - min_delta:
        best_loss = avg_loss
        early_stop_counter = 0
        print(f"Epoch----- {epoch + 1}   Loss improved :----- {best_loss:.4f}")
    else:
        early_stop_counter += 1

    # Stop training if no improvement is seen for 'patience' epochs
    if early_stop_counter >= patience:
        print(f" Early stopping triggered at epoch {epoch + 1}")
        print(f" Best Loss: {best_loss:.4f} achieved at epoch {epoch + 1 - patience}")
        break

Epoch----- 1   Loss improved :----- 0.5541
Epoch----- 2   Loss improved :----- 0.4231
Epoch----- 3   Loss improved :----- 0.4028
Epoch----- 4   Loss improved :----- 0.3872
Epoch [5/200], Loss: 0.3595
Epoch----- 5   Loss improved :----- 0.3595
Epoch----- 6   Loss improved :----- 0.3398
Epoch----- 7   Loss improved :----- 0.3355
Epoch----- 8   Loss improved :----- 0.3281
Epoch [10/200], Loss: 0.3418
Epoch----- 12   Loss improved :----- 0.3247
Epoch [15/200], Loss: 0.3232
Epoch----- 15   Loss improved :----- 0.3232
Epoch----- 16   Loss improved :----- 0.3114
Epoch----- 19   Loss improved :----- 0.3069
Epoch [20/200], Loss: 0.3141
Epoch----- 21   Loss improved :----- 0.3039
Epoch----- 23   Loss improved :----- 0.3032
Epoch----- 24   Loss improved :----- 0.2984
Epoch [25/200], Loss: 0.3178
Epoch----- 27   Loss improved :----- 0.2855
Epoch----- 28   Loss improved :----- 0.2773
Epoch [30/200], Loss: 0.2898
Epoch----- 31   Loss improved :----- 0.2721
Epoch [35/200], Loss: 0.2966
 Early stoppin

In [15]:
# Model Evaluation
def evaluate(model, loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            probs = torch.sigmoid(outputs)  # Convert logits to probabilities
            preds = (probs >= 0.5).float()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())

    # Metrics calculation
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_preds)

    return {
        'Accuracy': f"{accuracy:.4f}",
        'Precision': f"{precision:.4f}",
        'Recall': f"{recall:.4f}",
        'F1-Score': f"{f1:.4f}",
        'AUC-ROC': f"{auc:.4f}"
    }
# Evaluate the model
metrics = evaluate(model, test_loader, device)
print("\nModel Evaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value}")


Model Evaluation Metrics:
Accuracy: 0.9067
Precision: 0.8519
Recall: 0.8118
F1-Score: 0.8313
AUC-ROC: 0.8780


In [16]:
#Candidate Prediction and Ranking   , Predict on the entire dataset
model.eval()
with torch.no_grad():
    all_preds = torch.sigmoid(model(torch.tensor(X, dtype=torch.float32).to(device))).cpu().numpy().flatten()

# predicted scores to the dataframe
df['predicted_score'] = all_preds

# Rank candidates
top_n = 1500
top_candidates = df.sort_values('predicted_score', ascending=False).head(top_n)
print("\nTop N Candidates Based on Predicted Scores:")
print(top_candidates[['Age', 'Gender', 'ExperienceYears', 'InterviewScore', 'predicted_score']])


Top N Candidates Based on Predicted Scores:
      Age  Gender  ExperienceYears  InterviewScore  predicted_score
1094   39       0               15              58         0.999232
1088   49       1               14              91         0.998937
253    28       1                9              37         0.997643
516    45       0               10              49         0.997436
682    47       1               15               8         0.997426
...   ...     ...              ...             ...              ...
466    47       1                2               9         0.002255
576    34       1                1              29         0.002109
1262   31       1                2              47         0.001337
384    20       1                4              60         0.001286
579    27       1                0              41         0.001152

[1500 rows x 5 columns]


In [17]:
top_candidates.head(20)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,predicted_score
1094,39,0,4,15,1,5,58,99,86,1,1,0.999232
1088,49,1,4,14,1,17,91,71,80,1,1,0.998937
253,28,1,4,9,4,37,37,91,72,1,1,0.997643
516,45,0,4,10,4,29,49,69,96,1,1,0.997436
682,47,1,4,15,1,49,8,100,95,1,1,0.997426
726,26,0,3,15,3,30,76,72,87,1,1,0.997172
1095,44,0,4,8,3,34,69,65,98,1,1,0.997153
991,20,1,4,5,5,14,69,61,84,1,1,0.996849
447,34,0,4,15,5,16,35,96,43,1,1,0.996649
468,24,0,4,9,5,16,81,82,9,1,1,0.9965


In [18]:
top_candidates.tail(20)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,predicted_score
883,48,1,1,12,2,2,19,2,88,3,0,0.003553
1432,42,1,1,10,1,38,12,8,16,2,1,0.003374
1144,22,1,1,12,1,7,7,26,53,2,0,0.003321
1375,43,1,2,13,2,4,30,10,20,3,0,0.003314
1118,21,1,1,6,1,17,60,38,63,2,0,0.003271
403,33,1,1,11,2,8,32,15,59,2,0,0.003268
1152,38,1,3,4,1,6,48,4,21,3,0,0.003208
170,31,1,2,1,2,14,11,23,44,2,0,0.003152
1114,47,1,2,15,1,7,28,17,28,3,0,0.003113
908,40,1,1,1,1,46,24,7,86,2,0,0.00295


In [19]:
top_candidates.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1500 entries, 1094 to 579
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Age                  1500 non-null   int64  
 1   Gender               1500 non-null   int64  
 2   EducationLevel       1500 non-null   int64  
 3   ExperienceYears      1500 non-null   int64  
 4   PreviousCompanies    1500 non-null   int64  
 5   DistanceFromCompany  1500 non-null   int64  
 6   InterviewScore       1500 non-null   int64  
 7   SkillScore           1500 non-null   int64  
 8   PersonalityScore     1500 non-null   int64  
 9   RecruitmentStrategy  1500 non-null   int64  
 10  HiringDecision       1500 non-null   int64  
 11  predicted_score      1500 non-null   float32
dtypes: float32(1), int64(11)
memory usage: 146.5 KB


In [20]:
top_candidates['HiringDecision'].value_counts()

HiringDecision
0    1035
1     465
Name: count, dtype: int64

In [21]:
model.state_dict()

OrderedDict([('fc1.weight',
              tensor([[-0.0660,  0.1288,  0.1607,  ...,  0.1085, -0.0671, -0.3541],
                      [-0.0260, -0.1355, -0.2258,  ..., -0.0908, -0.0727,  0.2164],
                      [ 0.1943,  0.1328, -0.0643,  ...,  0.0850, -0.0778, -0.3931],
                      ...,
                      [ 0.1713,  0.2953,  0.0475,  ...,  0.2966,  0.3058, -0.1710],
                      [-0.0101, -0.0249, -0.2005,  ...,  0.4228, -0.1942, -0.2877],
                      [ 0.1144,  0.0412, -0.2442,  ..., -0.2331, -0.2383, -0.3882]],
                     device='cuda:0')),
             ('fc1.bias',
              tensor([ 0.0137, -0.1242, -0.1600, -0.2691,  0.2687, -0.1815, -0.0517,  0.3150,
                      -0.1171, -0.2245,  0.1647, -0.1512,  0.2686, -0.0163, -0.2489,  0.2040,
                       0.0015, -0.0779, -0.2547,  0.0116,  0.0905, -0.1486,  0.1269,  0.0272,
                       0.0089,  0.0213, -0.1652, -0.2252, -0.1641, -0.0693, -0.0478,  0.1411

In [22]:
model_path = "CandidateRankingModel.pth"
torch.save({'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': best_loss,
           },model_path)

print(f"Model saved at {model_path}")

Model saved at CandidateRankingModel.pth
