Extracted realistic skilss, education, countries to belong and experience from real data of Nurse.

In [1]:
import pandas as pd
import re
import spacy
from spacy.matcher import PhraseMatcher


In [3]:
nlp = spacy.load("en_core_web_sm")

In [4]:
df_physician = pd.read_csv("Nurse_balanced_200.csv")
df_physician

Unnamed: 0,hard_text,profession,gender
0,Mr. Orallo received his Advanced Nurse Practit...,nurse,Male
1,He graduated with honors in 2012. Having more ...,nurse,Male
2,He graduated with honors in 2012. Having more ...,nurse,Male
3,He describes a typical day in the private pain...,nurse,Male
4,Edwin Auman (as his nurse) for twenty-one year...,nurse,Male
...,...,...,...
195,"Growing up in Budapest, Sacramento, and Colley...",nurse,Female
196,She is currently working at the Huntsman Cance...,nurse,Female
197,She graduated with honors in 2012. Having more...,nurse,Female
198,She graduated with honors in 2012. Having more...,nurse,Female


In [14]:
# Define keywords
skills = ['Patient Care',
'Vital Signs Monitoring',
'Medication Administration',
'IV Therapy',
'Wound Care',
'Pediatric Care',
'Geriatric Care',
'Critical Care',
'ICU / CCU Experience',
'Infection Control',
'Electronic Health Records (EHR)',
'Charting',
'Patient Assessmen'
]
education = ["BSN", "Bachelor of Science in Nursing",
    "MSN", "Master of Science in Nursing",
    "ADN", "Associate Degree in Nursing",
    "Diploma in Nursing",
    "DNP", "Doctor of Nursing Practice",
    "RN", "Registered Nurse",
    "LPN", "Licensed Practical Nurse",
    "LVN", "Licensed Vocational Nurse",
    "RN-BSN", "BSN Bridge Program",
    "Nurse Practitioner", "NP",
    "Certified Nurse Midwife", "CNM",
    "Clinical Nurse Specialist", "CNS",
    "Certified Registered Nurse Anesthetist", "CRNA"
    ]

In [15]:
def extract_experience(text):
    """
    Extracts years of experience from a text string
    """
    match = re.search(r'(\d+)\s+years?', text.lower())
    return int(match.group(1)) if match else None

In [16]:

# Create phrase matchers
skill_matcher = PhraseMatcher(nlp.vocab)
edu_matcher = PhraseMatcher(nlp.vocab)

skill_patterns = [nlp.make_doc(skill) for skill in skills]
edu_patterns = [nlp.make_doc(edu) for edu in education]

skill_matcher.add("SKILL", skill_patterns)
edu_matcher.add("EDU", edu_patterns)


In [17]:
# ---------- Extraction loop ---------
all_skills = []
all_countries = []
all_experience = []
all_education = []

for bio in df_physician['hard_text']:
    doc = nlp(bio)

    # countries
    countries = [ent.text for ent in doc.ents if ent.label_ == "GPE"]

    # skills
    skills_found = [doc[start:end].text for match_id, start, end in skill_matcher(doc)]
    edu_found = [doc[start:end].text for match_id, start, end in edu_matcher(doc)]


    # experience
    experience = extract_experience(bio)

    all_skills.append(list(set(skills_found)))
    all_education.append(list(set(edu_found)))
    all_countries.append(list(set(countries)))
    all_experience.append(experience)

# ---------- Add to dataframe ----------
df_physician['skills_extracted'] = all_skills
df_physician['education_extracted'] = all_education
df_physician['countries_extracted'] = all_countries
df_physician['experience_years'] = all_experience

# ---------- Save enhanced dataset ----------
df_physician.to_csv("nurses_extracted.csv", index=False)
print("‚úÖ Extraction complete. Saved as nurses_extracted.csv")

‚úÖ Extraction complete. Saved as nurses_extracted.csv


In [18]:
df = pd.read_csv("nurses_extracted.csv")
df

Unnamed: 0,hard_text,profession,gender,skills_extracted,education_extracted,countries_extracted,experience_years
0,Mr. Orallo received his Advanced Nurse Practit...,nurse,Male,[],['Nurse Practitioner'],[],
1,He graduated with honors in 2012. Having more ...,nurse,Male,[],[],['NURSE'],6.0
2,He graduated with honors in 2012. Having more ...,nurse,Male,[],[],['NURSE'],4.0
3,He describes a typical day in the private pain...,nurse,Male,[],[],['neuralgia'],
4,Edwin Auman (as his nurse) for twenty-one year...,nurse,Male,[],[],[],
...,...,...,...,...,...,...,...
195,"Growing up in Budapest, Sacramento, and Colley...",nurse,Female,[],[],"['Budapest', 'Sacramento']",
196,She is currently working at the Huntsman Cance...,nurse,Female,[],[],[],
197,She graduated with honors in 2012. Having more...,nurse,Female,[],[],['NURSE'],5.0
198,She graduated with honors in 2012. Having more...,nurse,Female,[],[],"['Thedacare', 'NURSE']",4.0


Generation of Job Ad for Nurse

In [25]:

from collections import Counter         # a special Python class from the collections module that makes it extremely easy to count the frequency of items in a list.
import random


def aggregate_keywords(df):
    all_skills = Counter()
    all_education = Counter()
    all_countries = Counter()
    experience_list = []

    # List of words to ignore when counting countries
    ignore_countries = ["NURSE", "Nursing"]

    for _, row in df.iterrows():
        # Skills
        for s in eval(row['skills_extracted']):
            all_skills[s] += 1
        
        # Education
        for e in eval(row['education_extracted']):
            all_education[e] += 1
        
        # Countries
        for c in eval(row['countries_extracted']):
            if c not in ignore_countries:
                all_countries[c] += 1

        # Experience
        if not pd.isna(row['experience_years']):
            experience_list.append(int(row['experience_years']))

    top_skills = [s for s, _ in all_skills.most_common(5)]
    top_edu = [e for e, _ in all_education.most_common(2)]
    top_countries = [c for c, _ in all_countries.most_common(1)]
    avg_exp = int(sum(experience_list)/len(experience_list)) if experience_list else 5

    return top_skills, top_edu, top_countries, avg_exp

skills, edu, countries, exp = aggregate_keywords(df)
print("Skills:", skills)
print("Education:", edu)
print("Country:", countries)
print("Experience:", exp)


Skills: ['Critical Care']
Education: ['Nurse Practitioner', 'Master of Science in Nursing']
Country: ['New York City']
Experience: 12


In [26]:
def generate_nurse_job_ad(skills, education, country, experience):
    job_ad = f"""
üè• Job Title: Registered Nurse
üìç Location: {country}
üïí Employment Type: Full-time

About Us
Our hospital is committed to delivering compassionate and patient-focused care. We value collaboration and provide a supportive environment where nurses can grow and make a meaningful impact on patients' lives.

Position Overview
We are seeking a dedicated Registered Nurse with approximately {experience}+ years of clinical experience. The ideal candidate will demonstrate strong patient care skills, excellent communication, and the ability to work closely with the care team.

Key Responsibilities
- Provide direct patient care, assessments, and monitoring
- Administer medications and treatments safely
- Educate patients and families on care plans and wellness
- Collaborate with physicians, therapists, and other healthcare staff
- Maintain accurate patient records and documentation

Required Qualifications
- Nursing degree ({', '.join(education)}) or equivalent
- Active RN license (or eligibility to obtain)
- Strong clinical and interpersonal skills

Preferred Skills
- {', '.join(skills)}

What We Offer
- Supportive and collaborative work environment
- Competitive salary and benefits
- Professional development and continuing education
- A culture that values compassion, teamwork, and patient well-being

How to Apply
Interested applicants may submit their resume and a brief cover letter. We look forward to welcoming a new member to our nursing team.
"""
    return job_ad


In [27]:
final_job_ad = generate_nurse_job_ad(skills, edu, countries[0], exp)
print(final_job_ad)

output_filename = "nurse_job_ad.txt"

with open(output_filename, "w", encoding="utf-8") as f:
    f.write(final_job_ad)


üè• Job Title: Registered Nurse
üìç Location: New York City
üïí Employment Type: Full-time

About Us
Our hospital is committed to delivering compassionate and patient-focused care. We value collaboration and provide a supportive environment where nurses can grow and make a meaningful impact on patients' lives.

Position Overview
We are seeking a dedicated Registered Nurse with approximately 12+ years of clinical experience. The ideal candidate will demonstrate strong patient care skills, excellent communication, and the ability to work closely with the care team.

Key Responsibilities
- Provide direct patient care, assessments, and monitoring
- Administer medications and treatments safely
- Educate patients and families on care plans and wellness
- Collaborate with physicians, therapists, and other healthcare staff
- Maintain accurate patient records and documentation

Required Qualifications
- Nursing degree (Nurse Practitioner, Master of Science in Nursing) or equivalent
- Active