In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
#  Import Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity

In [3]:

#  Load Dataset
file_path = '/kaggle/input/original-hiring-dataset/Hiring_dataset.csv'
df = pd.read_csv(file_path)

#  Preprocessing
X = df.drop(['HiringDecision'], axis=1)
y = df['HiringDecision']

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/original-hiring-dataset/Hiring_dataset.csv'

In [None]:


#  Identify Numerical & Categorical Features
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

#  Scale Numerical Features
scaler = StandardScaler()
X_numerical = scaler.fit_transform(X[numerical_features])

#  Encode Categorical Features
encoder = OneHotEncoder()
X_categorical = encoder.fit_transform(X[categorical_features]).toarray()

#  Combine Preprocessed Features
X_preprocessed = np.hstack((X_numerical, X_categorical))

#  Split Data
X_train, X_test = train_test_split(X_preprocessed, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

#  Deep Learning Model (Autoencoder-like)
class ContentBasedRecommender(nn.Module):
    def __init__(self, input_dim, embedding_dim=64):
        super(ContentBasedRecommender, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, embedding_dim)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

#  Initialize Model
input_dim = X_train_tensor.shape[1]
embedding_dim = 64

model = ContentBasedRecommender(input_dim, embedding_dim)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

#  Contrastive Loss Function (Fixed)
def contrastive_loss(embeddings):
    """
    Contrastive loss using cosine similarity in the embedding space.
    """
    batch_size = embeddings.shape[0]

    # Randomly select positive and negative samples
    idx = torch.randperm(batch_size)

    positive = embeddings
    negative = embeddings[idx]

    #  Cosine similarities
    sim_pos = F.cosine_similarity(positive, positive)  # Similarities with itself (should be 1)
    sim_neg = F.cosine_similarity(positive, negative)  # Similarities with random negatives

    #  Contrastive loss calculation
    loss = torch.mean(1 - sim_pos + sim_neg)
    return loss

# Training Loop
epochs = 50
batch_size = 64

for epoch in range(epochs):
    model.train()

    for i in range(0, len(X_train_tensor), batch_size):
        batch = X_train_tensor[i:i + batch_size]

        optimizer.zero_grad()

        # Forward pass
        embeddings = model(batch)

        # Contrastive loss (fixed)
        loss = contrastive_loss(embeddings)

        #  Backward pass
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Get Embeddings
model.eval()
train_embeddings = model(X_train_tensor).detach().numpy()
test_embeddings = model(X_test_tensor).detach().numpy()

print("\n Embeddings Generated Successfully!")
print("Train Embeddings Shape:", train_embeddings.shape)
print("Test Embeddings Shape:", test_embeddings.shape)

#  Recommendation System
def recommend_candidates(target_profile, top_n=5):
    """
    Recommend top-N candidates based on similarity to the target profile.
    """
    #  Preprocess Target Profile
    target_numerical = scaler.transform(target_profile[numerical_features])
    target_categorical = encoder.transform(target_profile[categorical_features]).toarray()
    target_preprocessed = np.hstack((target_numerical, target_categorical))
    
    # Convert to PyTorch tensor
    target_tensor = torch.tensor(target_preprocessed, dtype=torch.float32)

    # Generate target embedding
    with torch.no_grad():
        target_embedding = model(target_tensor).detach().numpy()

    # Calculate cosine similarities
    similarities = cosine_similarity(target_embedding, test_embeddings)[0]

    # Rank candidates by similarity
    ranked_indices = np.argsort(similarities)[::-1]
    top_candidates = ranked_indices[:top_n]

    #  Prepare Recommendation DataFrame
    recommendations = pd.DataFrame({
        'Candidate_Index': top_candidates,
        'Similarity_Score': similarities[top_candidates]
    })
    
    return recommendations

#  Sample Target Profile
sample_target = pd.DataFrame({
    'Age': [35],
    'Gender': ['Male'],
    'EducationLevel': ["Master's"],
    'ExperienceYears': [8],
    'PreviousCompanies': [3],
    'DistanceFromCompany': [20],
    'InterviewScore': [75],
    'SkillScore': [80],
    'PersonalityScore': [85],
    'RecruitmentStrategy': ['Aggressive']
})

# Recommend Candidates
recommendations = recommend_candidates(sample_target, top_n=5)
print("\ Top Recommended Candidates:")
print(recommendations)


In [None]:
recommendations