In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler,LabelEncoder, MinMaxScaler


In [2]:
df=pd.read_csv('/kaggle/input/hirings-dataset/hiring.csv')
df.shape

(1500, 11)

In [3]:
df.head(20)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision
0,26,1,2,0,3,27,48,78,91,1,1
1,39,1,4,12,3,26,35,68,80,2,1
2,48,0,2,3,2,10,20,67,13,2,0
3,34,1,2,5,2,6,36,27,70,3,0
4,30,0,1,6,1,43,23,52,85,2,0
5,27,0,3,14,4,32,54,50,50,1,1
6,48,0,2,6,1,17,24,52,64,3,0
7,40,0,4,13,3,11,6,3,92,3,0
8,26,1,3,6,5,29,80,78,51,1,1
9,45,1,2,2,5,30,92,16,94,3,0


In [4]:
df[df['HiringDecision']==0].head(20)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision
2,48,0,2,3,2,10,20,67,13,2,0
3,34,1,2,5,2,6,36,27,70,3,0
4,30,0,1,6,1,43,23,52,85,2,0
6,48,0,2,6,1,17,24,52,64,3,0
7,40,0,4,13,3,11,6,3,92,3,0
9,45,1,2,2,5,30,92,16,94,3,0
10,38,1,1,15,2,11,93,66,29,2,0
11,42,0,3,5,5,44,70,73,56,2,0
13,30,0,1,7,5,19,70,55,81,2,0
14,43,1,2,3,3,35,23,22,98,1,0


In [5]:
df[df['HiringDecision']==1].head(20)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision
0,26,1,2,0,3,27,48,78,91,1,1
1,39,1,4,12,3,26,35,68,80,2,1
5,27,0,3,14,4,32,54,50,50,1,1
8,26,1,3,6,5,29,80,78,51,1,1
12,30,0,3,12,5,5,96,46,78,3,1
21,40,1,2,8,1,15,99,58,98,2,1
25,49,1,2,12,4,36,22,36,76,1,1
32,45,1,4,11,5,6,11,92,30,1,1
33,41,0,3,13,4,28,33,89,91,3,1
37,36,1,3,9,5,42,92,32,63,2,1


In [6]:
# Average of the three scores
df['Average'] = df[['PersonalityScore', 'SkillScore', 'InterviewScore']].mean(axis=1)

In [7]:
# Weighted score combining different features
df['WeightedScore'] = (-0.05 * df['DistanceFromCompany'] +
                        0.25 * df['SkillScore'] +
                        0.35 * df['InterviewScore'] +
                        0.15 * df['PersonalityScore'] +
                        0.15 * df['ExperienceYears'] +
                        0.15 * df['PreviousCompanies'])

In [8]:
df.head(50)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,Average,WeightedScore
0,26,1,2,0,3,27,48,78,91,1,1,72.333333,49.05
1,39,1,4,12,3,26,35,68,80,2,1,61.0,42.2
2,48,0,2,3,2,10,20,67,13,2,0,33.333333,25.95
3,34,1,2,5,2,6,36,27,70,3,0,44.333333,30.6
4,30,0,1,6,1,43,23,52,85,2,0,53.333333,32.7
5,27,0,3,14,4,32,54,50,50,1,1,51.333333,40.0
6,48,0,2,6,1,17,24,52,64,3,0,46.666667,31.2
7,40,0,4,13,3,11,6,3,92,3,0,33.666667,18.5
8,26,1,3,6,5,29,80,78,51,1,1,69.666667,55.35
9,45,1,2,2,5,30,92,16,94,3,0,67.333333,49.85


In [9]:
df[df['HiringDecision']==0].head(10)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,Average,WeightedScore
2,48,0,2,3,2,10,20,67,13,2,0,33.333333,25.95
3,34,1,2,5,2,6,36,27,70,3,0,44.333333,30.6
4,30,0,1,6,1,43,23,52,85,2,0,53.333333,32.7
6,48,0,2,6,1,17,24,52,64,3,0,46.666667,31.2
7,40,0,4,13,3,11,6,3,92,3,0,33.666667,18.5
9,45,1,2,2,5,30,92,16,94,3,0,67.333333,49.85
10,38,1,1,15,2,11,93,66,29,2,0,62.666667,55.4
11,42,0,3,5,5,44,70,73,56,2,0,66.333333,50.45
13,30,0,1,7,5,19,70,55,81,2,0,68.666667,51.25
14,43,1,2,3,3,35,23,22,98,1,0,47.666667,27.4


In [10]:
df[df['HiringDecision']==1].head(10)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,Average,WeightedScore
0,26,1,2,0,3,27,48,78,91,1,1,72.333333,49.05
1,39,1,4,12,3,26,35,68,80,2,1,61.0,42.2
5,27,0,3,14,4,32,54,50,50,1,1,51.333333,40.0
8,26,1,3,6,5,29,80,78,51,1,1,69.666667,55.35
12,30,0,3,12,5,5,96,46,78,3,1,73.333333,59.1
21,40,1,2,8,1,15,99,58,98,2,1,85.0,64.45
25,49,1,2,12,4,36,22,36,76,1,1,44.666667,28.7
32,45,1,4,11,5,6,11,92,30,1,1,44.333333,33.45
33,41,0,3,13,4,28,33,89,91,3,1,71.0,48.6
37,36,1,3,9,5,42,92,32,63,2,1,62.333333,49.65


In [11]:
df.drop(columns=['Average'], inplace=True)

In [12]:
#scaling
numerical_features = ['Age', 'ExperienceYears', 'PreviousCompanies', 'DistanceFromCompany', 'InterviewScore', 'SkillScore', 'PersonalityScore']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

In [13]:
df.head(10)

Unnamed: 0,Age,Gender,EducationLevel,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,RecruitmentStrategy,HiringDecision,WeightedScore
0,-0.989083,1,2,-1.658237,-0.001418,0.103129,-0.089598,0.916174,1.418126,1,1,49.05
1,0.416376,1,4,0.928044,-0.001418,0.034438,-0.543879,0.575386,1.043255,2,1,42.2
2,1.389387,0,2,-1.011667,-0.710538,-1.064632,-1.068049,0.541307,-1.240051,2,0,25.95
3,-0.124185,1,2,-0.58062,-0.710538,-1.339399,-0.508934,-0.821844,0.702463,3,0,30.6
4,-0.556634,0,1,-0.365097,-1.419657,1.202199,-0.963215,0.030126,1.213651,2,0,32.7
5,-0.880971,0,3,1.359091,0.707701,0.446589,0.12007,-0.038032,0.020879,1,1,40.0
6,1.389387,0,2,-0.365097,-1.419657,-0.583789,-0.92827,0.030126,0.497988,3,0,31.2
7,0.524489,0,4,1.143567,-0.001418,-0.99594,-1.557274,-1.639734,1.452205,3,0,18.5
8,-0.989083,1,3,-0.365097,1.416821,0.240513,1.028631,0.916174,0.054958,1,1,55.35
9,1.06505,1,2,-1.22719,1.416821,0.309205,1.447967,-1.19671,1.520364,3,0,49.85


In [14]:
# One-Hot Encode the column
df = pd.get_dummies(df, columns=['EducationLevel'], drop_first=True)
df = pd.get_dummies(df, columns=['RecruitmentStrategy'], drop_first=False)

In [15]:
boolean_columns = ['EducationLevel_2', 'EducationLevel_3', 'EducationLevel_4', 'RecruitmentStrategy_1', 'RecruitmentStrategy_2', 'RecruitmentStrategy_3']

# Convert the boolean columns to integers
df[boolean_columns] = df[boolean_columns].astype(int)

In [16]:
df.head(10)

Unnamed: 0,Age,Gender,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,HiringDecision,WeightedScore,EducationLevel_2,EducationLevel_3,EducationLevel_4,RecruitmentStrategy_1,RecruitmentStrategy_2,RecruitmentStrategy_3
0,-0.989083,1,-1.658237,-0.001418,0.103129,-0.089598,0.916174,1.418126,1,49.05,1,0,0,1,0,0
1,0.416376,1,0.928044,-0.001418,0.034438,-0.543879,0.575386,1.043255,1,42.2,0,0,1,0,1,0
2,1.389387,0,-1.011667,-0.710538,-1.064632,-1.068049,0.541307,-1.240051,0,25.95,1,0,0,0,1,0
3,-0.124185,1,-0.58062,-0.710538,-1.339399,-0.508934,-0.821844,0.702463,0,30.6,1,0,0,0,0,1
4,-0.556634,0,-0.365097,-1.419657,1.202199,-0.963215,0.030126,1.213651,0,32.7,0,0,0,0,1,0
5,-0.880971,0,1.359091,0.707701,0.446589,0.12007,-0.038032,0.020879,1,40.0,0,1,0,1,0,0
6,1.389387,0,-0.365097,-1.419657,-0.583789,-0.92827,0.030126,0.497988,0,31.2,1,0,0,0,0,1
7,0.524489,0,1.143567,-0.001418,-0.99594,-1.557274,-1.639734,1.452205,0,18.5,0,0,1,0,0,1
8,-0.989083,1,-0.365097,1.416821,0.240513,1.028631,0.916174,0.054958,1,55.35,0,1,0,1,0,0
9,1.06505,1,-1.22719,1.416821,0.309205,1.447967,-1.19671,1.520364,0,49.85,1,0,0,0,0,1


In [17]:
df.shape

(1500, 16)

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Age                    1500 non-null   float64
 1   Gender                 1500 non-null   int64  
 2   ExperienceYears        1500 non-null   float64
 3   PreviousCompanies      1500 non-null   float64
 4   DistanceFromCompany    1500 non-null   float64
 5   InterviewScore         1500 non-null   float64
 6   SkillScore             1500 non-null   float64
 7   PersonalityScore       1500 non-null   float64
 8   HiringDecision         1500 non-null   int64  
 9   WeightedScore          1500 non-null   float64
 10  EducationLevel_2       1500 non-null   int64  
 11  EducationLevel_3       1500 non-null   int64  
 12  EducationLevel_4       1500 non-null   int64  
 13  RecruitmentStrategy_1  1500 non-null   int64  
 14  RecruitmentStrategy_2  1500 non-null   int64  
 15  Recr

In [19]:
df.head(10)

Unnamed: 0,Age,Gender,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,HiringDecision,WeightedScore,EducationLevel_2,EducationLevel_3,EducationLevel_4,RecruitmentStrategy_1,RecruitmentStrategy_2,RecruitmentStrategy_3
0,-0.989083,1,-1.658237,-0.001418,0.103129,-0.089598,0.916174,1.418126,1,49.05,1,0,0,1,0,0
1,0.416376,1,0.928044,-0.001418,0.034438,-0.543879,0.575386,1.043255,1,42.2,0,0,1,0,1,0
2,1.389387,0,-1.011667,-0.710538,-1.064632,-1.068049,0.541307,-1.240051,0,25.95,1,0,0,0,1,0
3,-0.124185,1,-0.58062,-0.710538,-1.339399,-0.508934,-0.821844,0.702463,0,30.6,1,0,0,0,0,1
4,-0.556634,0,-0.365097,-1.419657,1.202199,-0.963215,0.030126,1.213651,0,32.7,0,0,0,0,1,0
5,-0.880971,0,1.359091,0.707701,0.446589,0.12007,-0.038032,0.020879,1,40.0,0,1,0,1,0,0
6,1.389387,0,-0.365097,-1.419657,-0.583789,-0.92827,0.030126,0.497988,0,31.2,1,0,0,0,0,1
7,0.524489,0,1.143567,-0.001418,-0.99594,-1.557274,-1.639734,1.452205,0,18.5,0,0,1,0,0,1
8,-0.989083,1,-0.365097,1.416821,0.240513,1.028631,0.916174,0.054958,1,55.35,0,1,0,1,0,0
9,1.06505,1,-1.22719,1.416821,0.309205,1.447967,-1.19671,1.520364,0,49.85,1,0,0,0,0,1


In [20]:
# from sklearn.metrics.pairwise import cosine_similarity
# features = df.drop(['HiringDecision'], axis=1).values
# similarity_matrix = cosine_similarity(features)
# print(similarity_matrix)

In [21]:
# similarity_matrix

In [22]:
# similarity_matrix[1]

# Recommend top N similar candidates based on cosine similarity
# Args:
# N: Number of top candidates to recommend.

In [23]:
# def recommend_candidates(candidate_index, N=5):
#     # Get similarity scores for the target candidate
#     scores = list(enumerate(similarity_matrix[candidate_index]))
    
#     # Sort candidates by similarity score (descending order)
#     scores = sorted(scores, key=lambda x: x[1], reverse=True)
    
#     # Get the top N candidates
#     top_candidates = scores[1:N+1]
    
#     print(f"Top {N} candidates similar to candidate {candidate_index}:\n")
#     for i, (index, score) in enumerate(top_candidates):
#         print(f"{i+1}. Candidate {index} (Similarity Score: {score:.3f})")
# recommend_candidates(candidate_index=0, N=5)

In [24]:
# recommend_candidates(candidate_index=0, N=20)

In [25]:
# df.drop('weighted_score', axis=1, inplace=True)

# Working On Deep Learning Model

In [26]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error, explained_variance_score

In [27]:
X = df.drop(['WeightedScore'], axis=1).values
y = df['WeightedScore'].values

In [28]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
#tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [31]:
# Use DataLoader for efficient batching
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [32]:
class CandidateRankingModel(nn.Module):
    def __init__(self, input_dim):
        super(CandidateRankingModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 1)
        self.leaky_relu = nn.LeakyReLU(0.01)

        # Xavier Initialization
        nn.init.xavier_normal_(self.fc1.weight)
        nn.init.xavier_normal_(self.fc2.weight)
        nn.init.xavier_normal_(self.fc3.weight)

    def forward(self, x):
        x = self.leaky_relu(self.bn1(self.fc1(x)))
        x = self.leaky_relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

In [33]:
# GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [34]:
#Intsnsiate
input_dim = X_train.shape[1]
model = CandidateRankingModel(input_dim).to(device)

In [35]:
# Loss, Optimizer, and Scheduler
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

In [36]:
# Early Stopping
class EarlyStopping:
    def __init__(self, patience=10, delta=0.001):
        self.patience = patience
        self.delta = delta
        self.best_score = None
        self.epochs_no_improve = 0

    def __call__(self, val_loss):
        if self.best_score is None or val_loss < self.best_score - self.delta:
            self.best_score = val_loss
            self.epochs_no_improve = 0
        else:
            self.epochs_no_improve += 1

        return self.epochs_no_improve >= self.patience

# Initialize early stopping
early_stopping = EarlyStopping(patience=10, delta=0.001)

In [37]:
# Model Evaluation
def evaluate(model, loader, criterion, device):
    model.eval()
    mse, mae, r2 = 0.0, 0.0, 0.0
    total_samples = 0

    with torch.no_grad():
        for batch in loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)

            mse += mean_squared_error(targets.cpu(), outputs.cpu())
            mae += mean_absolute_error(targets.cpu(), outputs.cpu())
            r2 += r2_score(targets.cpu(), outputs.cpu())
            
            total_samples += 1

    # Average the metrics
    mse /= total_samples
    mae /= total_samples
    r2 /= total_samples

    # Return metrics rounded to 4 decimal places
    return {
        'MSE': f"{mse:.4f}",
        'MAE': f"{mae:.4f}",
        'R2': f"{r2:.4f}"
    }

In [38]:
# Training Loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Learning rate adjustment
    scheduler.step()

    # Model evaluation every 10 epochs
    if (epoch + 1) % 10 == 0:
        metrics = evaluate(model, test_loader, criterion, device)
        print(f"Epoch [{epoch+1}/{num_epochs}]")
        print(f"Training Loss: {total_loss / len(train_loader):.4f}")
        print(f"Metrics: {metrics}")

    # Early stopping check
    val_loss = total_loss / len(train_loader)
    if early_stopping(val_loss):
        print(f"Early stopping at epoch {epoch+1}")
        break

Epoch [10/100]
Training Loss: 8.6928
Metrics: {'MSE': '3.9274', 'MAE': '1.6485', 'R2': '0.9742'}
Epoch [20/100]
Training Loss: 8.9367
Metrics: {'MSE': '2.7799', 'MAE': '1.3921', 'R2': '0.9819'}
Epoch [30/100]
Training Loss: 5.8489
Metrics: {'MSE': '1.2728', 'MAE': '0.8645', 'R2': '0.9918'}
Epoch [40/100]
Training Loss: 5.5847
Metrics: {'MSE': '0.8185', 'MAE': '0.7426', 'R2': '0.9947'}
Epoch [50/100]
Training Loss: 4.8718
Metrics: {'MSE': '0.4533', 'MAE': '0.5168', 'R2': '0.9971'}
Early stopping at epoch 54


In [39]:
# Final Model Evaluation
final_metrics = evaluate(model, test_loader, criterion, device)

print("\nFinal Model Evaluation Metrics:")
print(f"  MSE: {final_metrics['MSE']}")
print(f"  MAE: {final_metrics['MAE']}")
print(f"  R2 Score: {final_metrics['R2']}")



Final Model Evaluation Metrics:
  MSE: 1.0996
  MAE: 0.8420
  R2 Score: 0.9930


In [40]:
model

CandidateRankingModel(
  (fc1): Linear(in_features=15, out_features=128, bias=True)
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (leaky_relu): LeakyReLU(negative_slope=0.01)
)

In [41]:
df.drop(['WeightedScore'], axis=1, inplace=True)

In [42]:
import torch
import random
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [43]:
df

Unnamed: 0,Age,Gender,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,HiringDecision,EducationLevel_2,EducationLevel_3,EducationLevel_4,RecruitmentStrategy_1,RecruitmentStrategy_2,RecruitmentStrategy_3
0,-0.989083,1,-1.658237,-0.001418,0.103129,-0.089598,0.916174,1.418126,1,1,0,0,1,0,0
1,0.416376,1,0.928044,-0.001418,0.034438,-0.543879,0.575386,1.043255,1,0,0,1,0,1,0
2,1.389387,0,-1.011667,-0.710538,-1.064632,-1.068049,0.541307,-1.240051,0,1,0,0,0,1,0
3,-0.124185,1,-0.580620,-0.710538,-1.339399,-0.508934,-0.821844,0.702463,0,1,0,0,0,0,1
4,-0.556634,0,-0.365097,-1.419657,1.202199,-0.963215,0.030126,1.213651,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,1.389387,0,-1.011667,0.707701,-1.133324,0.539406,-1.639734,1.043255,1,1,0,0,0,0,1
1496,-0.880971,1,0.496997,-0.001418,-0.721173,-0.264321,1.563671,-1.444526,0,1,0,0,0,1,0
1497,-1.205308,1,-1.442714,-0.710538,-1.476783,-0.683657,1.359198,0.293513,1,0,0,0,1,0,0
1498,1.389387,0,-0.796144,0.707701,0.721356,-1.452440,-0.481056,-0.183596,1,1,0,0,0,1,0


In [44]:
X = df.drop(['HiringDecision'], axis=1).values
y = df['HiringDecision'].values

In [45]:
# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [46]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [47]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [48]:
# Dataloader
batch_size=32
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [49]:
#Model Definition
class CandidateRankingModelClassification(nn.Module):
    def __init__(self, input_dim):
        super(CandidateRankingModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 1)  # Binary classification output
        self.dropout = nn.Dropout(0.3)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

In [50]:
# Instantiate the model
input_dim = X_train.shape[1]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CandidateRankingModel(input_dim).to(device)

In [51]:
# Loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100
batch_size = 64

In [52]:
# Training Loop with Early Stopping
patience = 10
best_loss = float('inf')
early_stop_counter = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for batch in train_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print metrics every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

    # Early stopping
    if running_loss < best_loss:
        best_loss = running_loss
        early_stop_counter = 0
    else:
        early_stop_counter += 1

    if early_stop_counter >= patience:
        print(f"Early stopping at epoch {epoch + 1}")

Epoch [10/100], Loss: 0.2568
Epoch [20/100], Loss: 0.2017
Epoch [30/100], Loss: 0.1544
Epoch [40/100], Loss: 0.1160
Epoch [50/100], Loss: 0.0915
Epoch [60/100], Loss: 0.0786
Epoch [70/100], Loss: 0.0815
Epoch [80/100], Loss: 0.0633
Epoch [90/100], Loss: 0.0519
Early stopping at epoch 94
Early stopping at epoch 95
Early stopping at epoch 96
Early stopping at epoch 97
Early stopping at epoch 98
Early stopping at epoch 99
Epoch [100/100], Loss: 0.0619
Early stopping at epoch 100


In [53]:
# 4. Model Evaluation
# ===========================
def evaluate(model, loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            probs = torch.sigmoid(outputs)  # Convert logits to probabilities
            preds = (probs >= 0.5).float()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())

    # Metrics calculation
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_preds)

    return {
        'Accuracy': f"{accuracy:.4f}",
        'Precision': f"{precision:.4f}",
        'Recall': f"{recall:.4f}",
        'F1-Score': f"{f1:.4f}",
        'AUC-ROC': f"{auc:.4f}"
    }
# Evaluate the model
metrics = evaluate(model, test_loader, device)
print("\nModel Evaluation Metrics:")
for metric, value in metrics.items():
    print(f"{metric}: {value}")



Model Evaluation Metrics:
Accuracy: 0.9100
Precision: 0.8537
Recall: 0.8235
F1-Score: 0.8383
AUC-ROC: 0.8839


In [54]:
#Candidate Prediction and Ranking   , Predict on the entire dataset

model.eval()
with torch.no_grad():
    all_preds = torch.sigmoid(model(torch.tensor(X, dtype=torch.float32).to(device))).cpu().numpy().flatten()

# Add predicted scores to the dataframe
df['predicted_score'] = all_preds

# Rank candidates
top_n = 100
top_candidates = df.sort_values('predicted_score', ascending=False).head(top_n)
print("\nTop N Candidates Based on Predicted Scores:")
print(top_candidates[['Age', 'Gender', 'ExperienceYears', 'InterviewScore', 'predicted_score']])


Top N Candidates Based on Predicted Scores:
           Age  Gender  ExperienceYears  InterviewScore  predicted_score
1088  1.497499       1         1.359091        1.413022         1.000000
682   1.281275       1         1.574614       -1.487385         0.999999
1157 -0.664746       1         0.928044        1.517856         0.999998
889  -0.340410       1         1.574614        1.727524         0.999997
1292  1.605611       1         0.928044        1.273244         0.999994
...        ...     ...              ...             ...              ...
757  -0.448522       1         1.574614        1.133465         0.999465
993  -1.637757       1         0.281474        0.050181         0.999384
1171  1.065050       0         1.143567       -0.858381         0.999367
1430 -1.097195       0        -1.227190        0.539406         0.999363
801  -1.313420       1         1.574614       -0.648713         0.999332

[100 rows x 5 columns]


In [55]:
top_candidates.head(20)

Unnamed: 0,Age,Gender,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,HiringDecision,EducationLevel_2,EducationLevel_3,EducationLevel_4,RecruitmentStrategy_1,RecruitmentStrategy_2,RecruitmentStrategy_3,predicted_score
1088,1.497499,1,1.359091,-1.419657,-0.583789,1.413022,0.677622,1.043255,1,0,0,1,1,0,0,1.0
682,1.281275,1,1.574614,-1.419657,1.61435,-1.487385,1.665907,1.554443,1,0,0,1,1,0,0,0.999999
1157,-0.664746,1,0.928044,-0.001418,0.652664,1.517856,0.370913,1.520364,1,0,0,1,0,1,0,0.999998
889,-0.34041,1,1.574614,1.416821,0.721356,1.727524,0.848016,1.622601,1,0,0,0,0,1,0,0.999997
1292,1.605611,1,0.928044,1.416821,-1.202016,1.273244,1.495513,-1.27413,1,0,1,0,1,0,0,0.999994
173,1.173162,1,-0.149573,0.707701,-0.99594,1.447967,-1.707892,1.452205,1,0,1,0,1,0,0,0.999994
86,0.200152,1,0.06595,-0.001418,0.996123,1.587746,1.529592,1.145493,1,1,0,0,1,0,0,0.999994
863,1.389387,1,1.359091,-0.710538,0.652664,1.028631,1.631828,1.520364,1,1,0,0,1,0,0,0.999993
1137,-0.016073,1,1.359091,1.416821,-0.99594,0.329738,1.154725,-1.546764,1,0,1,0,1,0,0,0.999993
1279,0.524489,0,-0.58062,-0.001418,-0.102946,1.727524,1.29104,-1.444526,1,0,0,1,1,0,0,0.999992


In [56]:
import pandas as pd

# Sample data (use this if you're manually adding the data)
data = {
    "Age": [-0.989083, 0.416376, 1.389387, -0.124185, -0.556634],
    "Gender": [1, 1, 0, 1, 0],
    "ExperienceYears": [-1.658237, 0.928044, -1.011667, -0.580620, -0.365097],
    "PreviousCompanies": [-0.001418, -0.001418, -0.710538, -0.710538, -1.419657],
    "DistanceFromCompany": [0.103129, 0.034438, -1.064632, -1.339399, 1.202199],
    "InterviewScore": [-0.089598, -0.543879, -1.068049, -0.508934, -0.963215],
    "SkillScore": [0.916174, 0.575386, 0.541307, -0.821844, 0.030126],
    "PersonalityScore": [1.418126, 1.043255, -1.240051, 0.702463, 1.213651],
    "HiringDecision": [1, 1, 1, 1, 1],
    "EducationLevel_2": [1, 0, 1, 1, 0],
    "EducationLevel_3": [0, 0, 0, 0, 0],
    "EducationLevel_4": [0, 1, 0, 0, 0],
    "RecruitmentStrategy_1": [1, 0, 0, 0, 0],
    "RecruitmentStrategy_2": [0, 1, 1, 0, 1],
    "RecruitmentStrategy_3": [0, 0, 0, 1, 0]
}

new_df = pd.DataFrame(data)

# Display the DataFrame
print("\n Verify DataFrame Preview:")
new_df.head()


 Verify DataFrame Preview:


Unnamed: 0,Age,Gender,ExperienceYears,PreviousCompanies,DistanceFromCompany,InterviewScore,SkillScore,PersonalityScore,HiringDecision,EducationLevel_2,EducationLevel_3,EducationLevel_4,RecruitmentStrategy_1,RecruitmentStrategy_2,RecruitmentStrategy_3
0,-0.989083,1,-1.658237,-0.001418,0.103129,-0.089598,0.916174,1.418126,1,1,0,0,1,0,0
1,0.416376,1,0.928044,-0.001418,0.034438,-0.543879,0.575386,1.043255,1,0,0,1,0,1,0
2,1.389387,0,-1.011667,-0.710538,-1.064632,-1.068049,0.541307,-1.240051,1,1,0,0,0,1,0
3,-0.124185,1,-0.58062,-0.710538,-1.339399,-0.508934,-0.821844,0.702463,1,1,0,0,0,0,1
4,-0.556634,0,-0.365097,-1.419657,1.202199,-0.963215,0.030126,1.213651,1,0,0,0,0,1,0


In [57]:
features = [
    'Age', 'Gender', 'ExperienceYears', 'PreviousCompanies', 'DistanceFromCompany',
    'InterviewScore', 'SkillScore', 'PersonalityScore',
    'EducationLevel_2', 'EducationLevel_3', 'EducationLevel_4',
    'RecruitmentStrategy_1', 'RecruitmentStrategy_2', 'RecruitmentStrategy_3'
]

target = 'HiringDecision'

# Extract features and target
X_new = new_df[features].values
y_new = new_df[target].values

# Ensure the same scaling as during training
X_new_scaled = scaler.transform(X_new)

# Convert to PyTorch tensors
X_new_tensor = torch.tensor(X_new_scaled, dtype=torch.float32).to(device)

# ===========================
# 3. Model Inference
# ===========================
model.eval()
with torch.no_grad():
    logits = model(X_new_tensor)                     # Get raw logits
    probs = torch.sigmoid(logits).cpu().numpy()      # Convert logits to probabilities

# ===========================
# 4. Display Results
# ===========================

# Add predictions to the DataFrame
new_df['Predicted_Probability'] = probs.flatten()
new_df['Predicted_Label'] = (new_df['Predicted_Probability'] >= 0.5).astype(int)  # Hired if >= 0.5

# Display results
print("\nModel Predictions on the New Dataset:")
print(new_df[['Age', 'Gender', 'ExperienceYears', 'InterviewScore', 'Predicted_Probability', 'Predicted_Label']].head(10))

# Count how many are predicted as Hired and Rejected
hired_count = (new_df['Predicted_Label'] == 1).sum()
rejected_count = (new_df['Predicted_Label'] == 0).sum()

print(f"\nTotal Candidates: {len(new_df)}")
print(f"Hired: {hired_count}")
print(f"Rejected: {rejected_count}")



Model Predictions on the New Dataset:
        Age  Gender  ExperienceYears  InterviewScore  Predicted_Probability  \
0 -0.989083       1        -1.658237       -0.089598               0.999003   
1  0.416376       1         0.928044       -0.543879               0.975204   
2  1.389387       0        -1.011667       -1.068049               0.000130   
3 -0.124185       1        -0.580620       -0.508934               0.000097   
4 -0.556634       0        -0.365097       -0.963215               0.000045   

   Predicted_Label  
0                1  
1                1  
2                0  
3                0  
4                0  

Total Candidates: 5
Hired: 2
Rejected: 3
