In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [4]:
data1 = pd.read_csv('historical_hiring_data.csv')
data1.head()

Unnamed: 0,candidate_id,resume_text,job_id,job_description_text,hired
0,1,Experienced software engineer with expertise i...,101,Looking for a software engineer with experienc...,1
1,2,Junior developer with Java and C++ experience.,102,Seeking a junior developer familiar with Java ...,0
2,3,Data scientist with strong Python skills and m...,101,Looking for a software engineer with experienc...,1
3,4,Senior developer with a focus on web developme...,103,Seeking a senior developer with web developmen...,1
4,5,Backend developer with experience in Ruby on R...,104,Hiring a backend developer with experience in ...,0


## Feature Extraction

In [7]:
tfidf = TfidfVectorizer(max_features=1000)
X_resumes = tfidf.fit_transform(data1['resume_text']).toarray()
X_jobs = tfidf.transform(data1['job_description_text']).toarray()

In [8]:
X = X_resumes + X_jobs 
y = data1['hired'] 

## Train-test split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model training

In [10]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

##  Predictions and evaluation

In [11]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       0.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Recommendation function

In [12]:
def recommend_candidates(job_description, candidate_resumes):
    job_features = tfidf.transform([job_description]).toarray()
    candidate_features = tfidf.transform(candidate_resumes).toarray()
    combined_features = candidate_features + job_features  # Simplified combination
    scores = model.predict_proba(combined_features)[:, 1]
    recommendations = sorted(zip(candidate_resumes, scores), key=lambda x: x[1], reverse=True)
    return recommendations

## Example used

In [13]:
job_description = "Looking for a software engineer with experience in Python and machine learning."
candidate_resumes = [
    "Experienced software engineer with expertise in Python and deep learning.",
    "Junior developer with Java and C++ experience.",
    "Data scientist with strong Python skills and machine learning experience."
]
recommendations = recommend_candidates(job_description, candidate_resumes)
for resume, score in recommendations:
    print(f"Resume: {resume}\nScore: {score}\n")

Resume: Experienced software engineer with expertise in Python and deep learning.
Score: 0.94

Resume: Data scientist with strong Python skills and machine learning experience.
Score: 0.94

Resume: Junior developer with Java and C++ experience.
Score: 0.9

