# Nurse Recruitment AI Model (Healthcare)
## Use Case
### Automatically shortlist nurses by matching resume text + experience against a job requirement.

### DataSet

In [9]:
import pandas as pd

# ----------------------------------
# Step 1: Dummy Nurse Recruitment Data
# ----------------------------------
data = {
    "resume_text": [
        "ICU nurse with 5 years experience",
        "Fresh graduate nurse",
        "Registered nurse with ER experience",
        "Senior nurse with leadership background",
        "Junior nurse no ICU exposure",
        "Nurse trainee with internship"
    ],
    "job_description": [
        "ICU nurse required",
        "Senior ICU nurse required",
        "Emergency room nurse",
        "Head nurse position",
        "ICU nurse required",
        "ICU nurse required"
    ],
    "experience_years": [5, 0, 4, 8, 1, 0],
    "selected": [1, 0, 1, 1, 0, 0]
}

df = pd.DataFrame(data)
print("Dataset Preview:\n", df, "\n")

Dataset Preview:
                                resume_text            job_description  \
0        ICU nurse with 5 years experience         ICU nurse required   
1                     Fresh graduate nurse  Senior ICU nurse required   
2      Registered nurse with ER experience       Emergency room nurse   
3  Senior nurse with leadership background        Head nurse position   
4             Junior nurse no ICU exposure         ICU nurse required   
5            Nurse trainee with internship         ICU nurse required   

   experience_years  selected  
0                 5         1  
1                 0         0  
2                 4         1  
3                 8         1  
4                 1         0  
5                 0         0   



### Feature Engineering

Model Used
Logistic Regression (with TF-IDF)
✅ Why This Model?
HR needs explainable decisions
Works very well with text classification
Fast to train
Easy to justify to HR & compliance teams
❗ In healthcare hiring, interpretability > complexity

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import hstack

# ----------------------------------
# Step 2: Combine Resume & Job Text
# ----------------------------------
df["combined_text"] = df["resume_text"] + " " + df["job_description"]

X_text = df["combined_text"]
X_exp = df[["experience_years"]]
y = df["selected"]

# TF-IDF Vectorization
tfidf = TfidfVectorizer(
    max_features=1000,
    stop_words="english",
    ngram_range=(1, 2)
)

X_tfidf = tfidf.fit_transform(X_text)

# Merge text + numeric features
X = hstack([X_tfidf, X_exp.values])

### Train–Test Split

In [11]:
from sklearn.model_selection import train_test_split

# ----------------------------------
# Step 3: Stratified Train-Test Split
# ----------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.33,
    random_state=42,
    stratify=y
)

### Model Training

In [12]:
from sklearn.linear_model import LogisticRegression

# ----------------------------------
# Step 4: Train Logistic Regression
# ----------------------------------
model = LogisticRegression(
    max_iter=1000,
    class_weight="balanced"  # handles small imbalance
)

model.fit(X_train, y_train)

### Model Evaluation

In [14]:
from sklearn.metrics import classification_report

# ----------------------------------
# Step 5: Evaluation
# ----------------------------------
y_pred = model.predict(X_test)

print("Classification Report:\n")
print(classification_report(y_test, y_pred))

Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



### Test Record

In [15]:
# ----------------------------------
# Step 6: Test with New Candidate
# ----------------------------------
test_candidate = {
    "resume_text": "ICU nurse with emergency care experience",
    "job_description": "ICU nurse required",
    "experience_years": 6
}

combined_test_text = (
    test_candidate["resume_text"] + " " +
    test_candidate["job_description"]
)

text_vec = tfidf.transform([combined_test_text])
features = hstack([text_vec, [[test_candidate["experience_years"]]]])

prediction = model.predict(features)[0]
confidence = model.predict_proba(features)[0][1]

print("Test Candidate:")
print(test_candidate)
print("\nPrediction:")
print("Shortlisted:", bool(prediction))
print("Match Score:", round(confidence, 2))

Test Candidate:
{'resume_text': 'ICU nurse with emergency care experience', 'job_description': 'ICU nurse required', 'experience_years': 6}

Prediction:
Shortlisted: True
Match Score: 0.94
